• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15262928007

26 May 2025 10:36PM UTC coverage: 58.284% (-2.0%) from 60.304%
15262928007

push

github

web-flow
Merge pull request #36 from daisytuner/sdfg-validation

Introduces new definition of memlets plus sanity checks

104 of 266 new or added lines in 6 files covered. (39.1%)

241 existing lines in 15 files now uncovered.

7908 of 13568 relevant lines covered (58.28%)

96.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/transformations/vectorization.cpp
1
#include "sdfg/transformations/vectorization.h"
2

3
#include "sdfg/types/utils.h"
4

5
namespace sdfg {
6
namespace transformations {
7

UNCOV
8
bool Vectorization::tasklet_supported(data_flow::TaskletCode c) {
×
UNCOV
9
    switch (c) {
×
10
        case data_flow::TaskletCode::assign:
UNCOV
11
            return true;
×
12
        case data_flow::TaskletCode::neg:
13
            return true;
×
14
        case data_flow::TaskletCode::add:
UNCOV
15
            return true;
×
16
        case data_flow::TaskletCode::sub:
17
            return true;
×
18
        case data_flow::TaskletCode::mul:
19
            return true;
×
20
        case data_flow::TaskletCode::div:
21
            return true;
×
22
        case data_flow::TaskletCode::fma:
23
            return true;
×
24
        case data_flow::TaskletCode::mod:
25
            return true;
×
26
        case data_flow::TaskletCode::abs:
27
            return true;
×
28
        case data_flow::TaskletCode::fabs:
29
            return true;
×
30
        case data_flow::TaskletCode::max:
31
            return true;
×
32
        case data_flow::TaskletCode::min:
33
            return true;
×
34
        case data_flow::TaskletCode::sqrt:
35
            return true;
×
36
        case data_flow::TaskletCode::sqrtf:
37
            return true;
×
38
        case data_flow::TaskletCode::sin:
39
            return true;
×
40
        case data_flow::TaskletCode::cos:
41
            return true;
×
42
        case data_flow::TaskletCode::tan:
43
            return true;
×
44
        case data_flow::TaskletCode::pow:
45
            return true;
×
46
        case data_flow::TaskletCode::exp:
47
            return true;
×
48
        case data_flow::TaskletCode::expf:
49
            return true;
×
50
        case data_flow::TaskletCode::exp2:
51
            return true;
×
52
        case data_flow::TaskletCode::log:
53
            return true;
×
54
        case data_flow::TaskletCode::log2:
55
            return true;
×
56
        case data_flow::TaskletCode::log10:
57
            return true;
×
58
        case data_flow::TaskletCode::floor:
59
            return true;
×
60
        case data_flow::TaskletCode::ceil:
61
            return true;
×
62
        case data_flow::TaskletCode::trunc:
63
            return true;
×
64
        case data_flow::TaskletCode::round:
65
            return true;
×
66
        case data_flow::TaskletCode::olt:
67
            return true;
×
68
        case data_flow::TaskletCode::ole:
69
            return true;
×
70
        case data_flow::TaskletCode::oeq:
71
            return true;
×
72
        case data_flow::TaskletCode::one:
73
            return true;
×
74
        case data_flow::TaskletCode::ogt:
75
            return true;
×
76
        case data_flow::TaskletCode::oge:
77
            return true;
×
78
        case data_flow::TaskletCode::bitwise_and:
79
            return true;
×
80
        case data_flow::TaskletCode::bitwise_or:
81
            return true;
×
82
        case data_flow::TaskletCode::bitwise_xor:
83
            return true;
×
84
        case data_flow::TaskletCode::bitwise_not:
85
            return true;
×
86
    }
87

88
    return false;
×
UNCOV
89
};
×
90

UNCOV
91
Vectorization::Vectorization(structured_control_flow::Sequence& parent,
×
92
                             structured_control_flow::For& loop)
UNCOV
93
    : loop_(loop) {
×
94

UNCOV
95
      };
×
96

97
std::string Vectorization::name() { return "Vectorization"; };
×
98

UNCOV
99
bool Vectorization::can_be_applied(Schedule& schedule) {
×
UNCOV
100
    auto& sdfg = schedule.builder().subject();
×
UNCOV
101
    auto& analysis_manager = schedule.analysis_manager();
×
UNCOV
102
    auto& body = this->loop_.root();
×
103

104
    // Section: CFG
105

106
    // Criterion: Body must only consists of blocks
UNCOV
107
    std::unordered_set<const data_flow::Tasklet*> tasklets;
×
UNCOV
108
    std::list<const structured_control_flow::ControlFlowNode*> queue = {&body};
×
UNCOV
109
    while (!queue.empty()) {
×
UNCOV
110
        auto node = queue.front();
×
UNCOV
111
        queue.pop_front();
×
112

UNCOV
113
        if (auto block = dynamic_cast<const structured_control_flow::Block*>(node)) {
×
UNCOV
114
            for (auto& child : block->dataflow().nodes()) {
×
UNCOV
115
                if (auto tasklet = dynamic_cast<const data_flow::Tasklet*>(&child)) {
×
UNCOV
116
                    tasklets.insert(tasklet);
×
UNCOV
117
                }
×
118
            }
UNCOV
119
        } else if (auto sequence = dynamic_cast<const structured_control_flow::Sequence*>(node)) {
×
UNCOV
120
            for (size_t i = 0; i < sequence->size(); i++) {
×
UNCOV
121
                if (sequence->at(i).second.assignments().size() > 0) {
×
122
                    return false;
×
123
                }
UNCOV
124
                queue.push_back(&sequence->at(i).first);
×
UNCOV
125
            }
×
UNCOV
126
        } else {
×
127
            return false;
×
128
        }
129
    }
130

131
    // Section: Operations
132

133
    // Criterion: All tasklets must be mappable to vector instructions
UNCOV
134
    for (auto& tasklet : tasklets) {
×
UNCOV
135
        if (!tasklet_supported(tasklet->code())) {
×
136
            return false;
×
137
        }
138

139
        // Criterion: All connectors must have same type
UNCOV
140
        types::PrimitiveType operation_type = types::PrimitiveType::Void;
×
UNCOV
141
        for (auto& inp : tasklet->inputs()) {
×
UNCOV
142
            const types::Scalar& type = inp.second;
×
UNCOV
143
            if (operation_type == types::PrimitiveType::Void) {
×
UNCOV
144
                operation_type = type.primitive_type();
×
UNCOV
145
            } else if (operation_type != type.primitive_type()) {
×
146
                return false;
×
147
            }
148
        }
149
    }
150

151
    // Section: Memory accesses
152

153
    // Criterion: Loop is contiguous
UNCOV
154
    auto indvar = this->loop_.indvar();
×
UNCOV
155
    if (!analysis::DataParallelismAnalysis::is_contiguous(this->loop_)) {
×
156
        return false;
×
157
    }
158

159
    // Criterion: No pointer operations
UNCOV
160
    auto& all_users = analysis_manager.get<analysis::Users>();
×
UNCOV
161
    analysis::UsersView users(all_users, body);
×
UNCOV
162
    if (!users.views().empty() || !users.moves().empty()) {
×
163
        return false;
×
164
    }
165

166
    // Determine moving symbols, i.e., pseudo loop iterators
UNCOV
167
    std::unordered_set<std::string> moving_symbols;
×
UNCOV
168
    auto write_subset = users.write_subsets();
×
UNCOV
169
    for (auto& entry : write_subset) {
×
UNCOV
170
        auto& data = entry.first;
×
UNCOV
171
        auto& type = sdfg.type(data);
×
UNCOV
172
        if (!dynamic_cast<const types::Scalar*>(&type)) {
×
UNCOV
173
            continue;
×
174
        }
UNCOV
175
        if (!types::is_integer(type.primitive_type())) {
×
UNCOV
176
            continue;
×
177
        }
UNCOV
178
        moving_symbols.insert(data);
×
179
    }
180

UNCOV
181
    size_t bit_width = 0;
×
182

183
    // Criterion: All memory accesses must be vectorizable
UNCOV
184
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
×
UNCOV
185
    auto& dependencies = analysis.get(this->loop_);
×
UNCOV
186
    if (dependencies.size() == 0) {
×
187
        return false;
×
188
    }
189

UNCOV
190
    auto read_subset = users.read_subsets();
×
UNCOV
191
    for (auto& dep : dependencies) {
×
UNCOV
192
        auto& container = dep.first;
×
UNCOV
193
        auto& type = sdfg.type(container);
×
UNCOV
194
        auto& dep_type = dep.second;
×
195

196
        // Criterion: Non-parallel accesses must be valid scatter operations
UNCOV
197
        if (dep_type == analysis::Parallelism::DEPENDENT) {
×
UNCOV
198
            return false;
×
199
        }
200

201
        // Criterion: All accesses are vectorizable and have matching bit width
UNCOV
202
        for (auto& subset : read_subset[container]) {
×
UNCOV
203
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
UNCOV
204
            if (access_type == AccessType::UNKNOWN) {
×
205
                return false;
×
206
            }
207

208
            // Symbolic expressions are casted to the right bit width
UNCOV
209
            if (type.is_symbol() && types::is_integer(type.primitive_type())) {
×
UNCOV
210
                continue;
×
211
            }
212

UNCOV
213
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
UNCOV
214
            auto primitive_type = inferred_type.primitive_type();
×
UNCOV
215
            if (primitive_type != types::PrimitiveType::Bool) {
×
UNCOV
216
                auto inferred_bit_width = types::bit_width(primitive_type);
×
UNCOV
217
                if (bit_width == 0) {
×
UNCOV
218
                    bit_width = inferred_bit_width;
×
UNCOV
219
                } else if (bit_width != inferred_bit_width) {
×
220
                    return false;
×
221
                }
UNCOV
222
            }
×
223
        }
UNCOV
224
        for (auto& subset : write_subset[container]) {
×
UNCOV
225
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
UNCOV
226
            if (access_type == AccessType::UNKNOWN) {
×
227
                return false;
×
228
            }
229

UNCOV
230
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
UNCOV
231
            auto primitive_type = inferred_type.primitive_type();
×
UNCOV
232
            if (primitive_type != types::PrimitiveType::Bool) {
×
UNCOV
233
                auto inferred_bit_width = types::bit_width(primitive_type);
×
UNCOV
234
                if (bit_width == 0) {
×
UNCOV
235
                    bit_width = inferred_bit_width;
×
UNCOV
236
                } else if (bit_width != inferred_bit_width) {
×
237
                    return false;
×
238
                }
UNCOV
239
            }
×
240
        }
241

242
        // Criterion: No inter-loop dependencies
243

244
        // Readonly, private and parallel inter-loop dependencies are trivially safe
UNCOV
245
        if (dep_type == analysis::Parallelism::READONLY ||
×
UNCOV
246
            dep_type == analysis::Parallelism::PRIVATE ||
×
UNCOV
247
            dep_type == analysis::Parallelism::PARALLEL) {
×
UNCOV
248
            continue;
×
249
        }
250

251
        // Criterion: Reductions must be supported operations
UNCOV
252
        if (dep_type == analysis::Parallelism::REDUCTION) {
×
253
            // Find reduction template
UNCOV
254
            auto writes = users.writes(container);
×
UNCOV
255
            if (writes.size() != 1) {
×
256
                return false;
×
257
            }
UNCOV
258
            auto element = writes[0]->element();
×
UNCOV
259
            if (!dynamic_cast<const data_flow::AccessNode*>(element)) {
×
260
                return false;
×
261
            }
UNCOV
262
            auto access_node = static_cast<const data_flow::AccessNode*>(element);
×
UNCOV
263
            auto& graph = access_node->get_parent();
×
UNCOV
264
            auto& edge = *graph.in_edges(*access_node).begin();
×
UNCOV
265
            auto& tasklet = static_cast<const data_flow::Tasklet&>(edge.src());
×
UNCOV
266
            if (tasklet.is_conditional()) {
×
267
                return false;
×
268
            }
269

UNCOV
270
            auto reads = users.reads(container);
×
UNCOV
271
            if (reads.size() == 0) {
×
272
                continue;
×
273
            }
UNCOV
274
            if (reads.size() > 1) {
×
275
                return false;
×
276
            }
277

UNCOV
278
            if (tasklet.code() != data_flow::TaskletCode::add &&
×
UNCOV
279
                tasklet.code() != data_flow::TaskletCode::min &&
×
UNCOV
280
                tasklet.code() != data_flow::TaskletCode::max &&
×
UNCOV
281
                tasklet.code() != data_flow::TaskletCode::fma &&
×
UNCOV
282
                tasklet.code() != data_flow::TaskletCode::assign) {
×
283
                return false;
×
284
            }
UNCOV
285
            if (tasklet.code() == data_flow::TaskletCode::fma) {
×
286
                // Must be the additive part
287
                for (auto& iedge : graph.in_edges(tasklet)) {
×
288
                    if (iedge.dst_conn() == tasklet.input(2).first) {
×
289
                        auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
290
                        if (read_node.data() != container) {
×
291
                            return false;
×
292
                        }
293
                    }
×
294
                }
295
            } else {
×
UNCOV
296
                bool found_input = false;
×
UNCOV
297
                for (auto& iedge : graph.in_edges(tasklet)) {
×
UNCOV
298
                    auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
UNCOV
299
                    if (read_node.data() == container) {
×
UNCOV
300
                        found_input = true;
×
UNCOV
301
                        break;
×
302
                    }
303
                }
UNCOV
304
                if (!found_input) {
×
305
                    return false;
×
306
                }
307
            }
UNCOV
308
        }
×
309
    }
UNCOV
310
    if (bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64) {
×
311
        return false;
×
312
    }
313

UNCOV
314
    return true;
×
UNCOV
315
};
×
316

317
void Vectorization::apply(Schedule& schedule) {
×
318
    // Set allocation lifetime of loop-local variables
319
    auto& analysis_manager = schedule.analysis_manager();
×
320

321
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
×
322
    auto& dependencies = analysis.get(this->loop_);
×
323
    for (auto& dep : dependencies) {
×
324
        auto& container = dep.first;
×
325
        auto& dep_type = dep.second;
×
326

327
        if (dep_type == analysis::Parallelism::PRIVATE) {
×
328
            schedule.allocation_lifetime(container, &this->loop_.root());
×
329
        }
×
330
    }
331

332
    std::string indvar = this->loop_.indvar()->get_name();
×
333
    schedule.allocation_lifetime(indvar, &this->loop_);
×
334

335
    schedule.loop_schedule(&this->loop_, LoopSchedule::VECTORIZATION);
×
336
};
×
337

UNCOV
338
AccessType Vectorization::classify_access(const data_flow::Subset& subset,
×
339
                                          const symbolic::Symbol& indvar,
340
                                          const std::unordered_set<std::string>& moving_symbols) {
341
    // Leading dimensions must be constant
UNCOV
342
    for (size_t i = 0; i < subset.size() - 1; i++) {
×
343
        auto& dim = subset[i];
×
344
        if (symbolic::uses(dim, indvar->get_name())) {
×
345
            return AccessType::UNKNOWN;
×
346
        }
347
        for (auto& scalar : moving_symbols) {
×
348
            if (symbolic::uses(dim, scalar)) {
×
349
                return AccessType::UNKNOWN;
×
350
            }
351
        }
352
    }
×
353

354
    // Three supported cases: Contiguous, Indirection, Constant
UNCOV
355
    auto& last_dim = subset.back();
×
356

357
    // Case: INDIRECTION
UNCOV
358
    symbolic::Symbol gather_variable = SymEngine::null;
×
UNCOV
359
    for (auto& scalar : moving_symbols) {
×
UNCOV
360
        if (symbolic::uses(last_dim, scalar)) {
×
UNCOV
361
            assert(gather_variable == SymEngine::null);
×
UNCOV
362
            gather_variable = symbolic::symbol(scalar);
×
UNCOV
363
        }
×
364
    }
UNCOV
365
    if (gather_variable != SymEngine::null) {
×
UNCOV
366
        return AccessType::INDIRECTION;
×
367
    }
368

369
    // Case: Contiguous
UNCOV
370
    if (symbolic::uses(last_dim, indvar->get_name())) {
×
UNCOV
371
        auto match = symbolic::affine(last_dim, indvar);
×
UNCOV
372
        if (match.first == SymEngine::null) {
×
373
            return AccessType::UNKNOWN;
×
374
        }
UNCOV
375
        if (!symbolic::eq(match.first, symbolic::integer(1))) {
×
376
            return AccessType::UNKNOWN;
×
377
        }
UNCOV
378
        return AccessType::CONTIGUOUS;
×
UNCOV
379
    }
×
380

381
    // Case: Constant
UNCOV
382
    return AccessType::CONSTANT;
×
UNCOV
383
};
×
384

385
}  // namespace transformations
386
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc