• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15049122732

15 May 2025 03:33PM UTC coverage: 62.241% (+4.7%) from 57.525%
15049122732

Pull #9

github

web-flow
Merge b96e33e0e into 9d3b1a2b3
Pull Request #9: Graphviz DOT Visualizer for SDFGs

520 of 542 new or added lines in 3 files covered. (95.94%)

782 existing lines in 68 files now uncovered.

8049 of 12932 relevant lines covered (62.24%)

504.09 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

60.7
/src/transformations/vectorization.cpp
1
#include "sdfg/transformations/vectorization.h"
2

3
#include "sdfg/types/utils.h"
4

5
namespace sdfg {
6
namespace transformations {
7

8
bool Vectorization::tasklet_supported(data_flow::TaskletCode c) {
8✔
9
    switch (c) {
8✔
10
        case data_flow::TaskletCode::assign:
11
            return true;
7✔
12
        case data_flow::TaskletCode::neg:
13
            return true;
×
14
        case data_flow::TaskletCode::add:
15
            return true;
1✔
16
        case data_flow::TaskletCode::sub:
17
            return true;
×
18
        case data_flow::TaskletCode::mul:
19
            return true;
×
20
        case data_flow::TaskletCode::div:
21
            return true;
×
22
        case data_flow::TaskletCode::fma:
23
            return true;
×
24
        case data_flow::TaskletCode::mod:
25
            return true;
×
26
        case data_flow::TaskletCode::abs:
27
            return true;
×
28
        case data_flow::TaskletCode::fabs:
29
            return true;
×
30
        case data_flow::TaskletCode::max:
31
            return true;
×
32
        case data_flow::TaskletCode::min:
33
            return true;
×
34
        case data_flow::TaskletCode::sqrt:
35
            return true;
×
36
        case data_flow::TaskletCode::sqrtf:
37
            return true;
×
38
        case data_flow::TaskletCode::sin:
39
            return true;
×
40
        case data_flow::TaskletCode::cos:
41
            return true;
×
42
        case data_flow::TaskletCode::tan:
43
            return true;
×
44
        case data_flow::TaskletCode::pow:
45
            return true;
×
46
        case data_flow::TaskletCode::exp:
47
            return true;
×
48
        case data_flow::TaskletCode::expf:
49
            return true;
×
50
        case data_flow::TaskletCode::exp2:
51
            return true;
×
52
        case data_flow::TaskletCode::log:
53
            return true;
×
54
        case data_flow::TaskletCode::log2:
55
            return true;
×
56
        case data_flow::TaskletCode::log10:
57
            return true;
×
58
        case data_flow::TaskletCode::floor:
59
            return true;
×
60
        case data_flow::TaskletCode::ceil:
61
            return true;
×
62
        case data_flow::TaskletCode::trunc:
63
            return true;
×
64
        case data_flow::TaskletCode::round:
65
            return true;
×
66
        case data_flow::TaskletCode::olt:
67
            return true;
×
68
        case data_flow::TaskletCode::ole:
69
            return true;
×
70
        case data_flow::TaskletCode::oeq:
71
            return true;
×
72
        case data_flow::TaskletCode::one:
73
            return true;
×
74
        case data_flow::TaskletCode::ogt:
75
            return true;
×
76
        case data_flow::TaskletCode::oge:
77
            return true;
×
78
        case data_flow::TaskletCode::bitwise_and:
79
            return true;
×
80
        case data_flow::TaskletCode::bitwise_or:
81
            return true;
×
82
        case data_flow::TaskletCode::bitwise_xor:
83
            return true;
×
84
        case data_flow::TaskletCode::bitwise_not:
85
            return true;
×
86
    }
87

88
    return false;
×
89
};
8✔
90

91
Vectorization::Vectorization(structured_control_flow::Sequence& parent,
6✔
92
                             structured_control_flow::For& loop)
93
    : loop_(loop) {
6✔
94

95
      };
6✔
96

97
std::string Vectorization::name() { return "Vectorization"; };
×
98

99
bool Vectorization::can_be_applied(Schedule& schedule) {
6✔
100
    auto& sdfg = schedule.builder().subject();
6✔
101
    auto& analysis_manager = schedule.analysis_manager();
6✔
102
    auto& body = this->loop_.root();
6✔
103

104
    // Section: CFG
105

106
    // Criterion: Body must only consists of blocks
107
    std::unordered_set<const data_flow::Tasklet*> tasklets;
6✔
108
    std::list<const structured_control_flow::ControlFlowNode*> queue = {&body};
6✔
109
    while (!queue.empty()) {
20✔
110
        auto node = queue.front();
14✔
111
        queue.pop_front();
14✔
112

113
        if (auto block = dynamic_cast<const structured_control_flow::Block*>(node)) {
14✔
114
            for (auto& child : block->dataflow().nodes()) {
33✔
115
                if (auto tasklet = dynamic_cast<const data_flow::Tasklet*>(&child)) {
25✔
116
                    tasklets.insert(tasklet);
8✔
117
                }
8✔
118
            }
119
        } else if (auto sequence = dynamic_cast<const structured_control_flow::Sequence*>(node)) {
14✔
120
            for (size_t i = 0; i < sequence->size(); i++) {
14✔
121
                if (sequence->at(i).second.assignments().size() > 0) {
8✔
122
                    return false;
×
123
                }
124
                queue.push_back(&sequence->at(i).first);
8✔
125
            }
8✔
126
        } else {
6✔
127
            return false;
×
128
        }
129
    }
130

131
    // Section: Operations
132

133
    // Criterion: All tasklets must be mappable to vector instructions
134
    for (auto& tasklet : tasklets) {
14✔
135
        if (!tasklet_supported(tasklet->code())) {
8✔
136
            return false;
×
137
        }
138

139
        // Criterion: All connectors must have same type
140
        types::PrimitiveType operation_type = types::PrimitiveType::Void;
8✔
141
        for (auto& inp : tasklet->inputs()) {
17✔
142
            const types::Scalar& type = inp.second;
9✔
143
            if (operation_type == types::PrimitiveType::Void) {
9✔
144
                operation_type = type.primitive_type();
8✔
145
            } else if (operation_type != type.primitive_type()) {
9✔
146
                return false;
×
147
            }
148
        }
149
    }
150

151
    // Section: Memory accesses
152

153
    // Criterion: Loop is contiguous
154
    auto indvar = this->loop_.indvar();
6✔
155
    if (!analysis::DataParallelismAnalysis::is_contiguous(this->loop_)) {
6✔
156
        return false;
×
157
    }
158

159
    // Criterion: No pointer operations
160
    auto& all_users = analysis_manager.get<analysis::Users>();
6✔
161
    analysis::UsersView users(all_users, body);
6✔
162
    if (!users.views().empty() || !users.moves().empty()) {
6✔
163
        return false;
×
164
    }
165

166
    // Determine moving symbols, i.e., pseudo loop iterators
167
    std::unordered_set<std::string> moving_symbols;
6✔
168
    auto write_subset = users.write_subsets();
6✔
169
    for (auto& entry : write_subset) {
14✔
170
        auto& data = entry.first;
8✔
171
        auto& type = sdfg.type(data);
8✔
172
        if (!dynamic_cast<const types::Scalar*>(&type)) {
8✔
173
            continue;
5✔
174
        }
175
        if (!types::is_integer(type.primitive_type())) {
3✔
176
            continue;
1✔
177
        }
178
        moving_symbols.insert(data);
2✔
179
    }
180

181
    size_t bit_width = 0;
6✔
182

183
    // Criterion: All memory accesses must be vectorizable
184
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
6✔
185
    auto& dependencies = analysis.get(this->loop_);
6✔
186
    if (dependencies.size() == 0) {
6✔
187
        return false;
×
188
    }
189

190
    auto read_subset = users.read_subsets();
6✔
191
    for (auto& dep : dependencies) {
14✔
192
        auto& container = dep.first;
9✔
193
        auto& type = sdfg.type(container);
9✔
194
        auto& dep_type = dep.second;
9✔
195

196
        // Criterion: Non-parallel accesses must be valid scatter operations
197
        if (dep_type == analysis::Parallelism::DEPENDENT) {
9✔
198
            return false;
1✔
199
        }
200

201
        // Criterion: All accesses are vectorizable and have matching bit width
202
        for (auto& subset : read_subset[container]) {
16✔
203
            auto access_type = classify_access(subset, indvar, moving_symbols);
8✔
204
            if (access_type == AccessType::UNKNOWN) {
8✔
205
                return false;
×
206
            }
207

208
            // Symbolic expressions are casted to the right bit width
209
            if (type.is_symbol() && types::is_integer(type.primitive_type())) {
8✔
210
                continue;
3✔
211
            }
212

213
            auto& inferred_type = types::infer_type(sdfg, type, subset);
5✔
214
            auto primitive_type = inferred_type.primitive_type();
5✔
215
            if (primitive_type != types::PrimitiveType::Bool) {
5✔
216
                auto inferred_bit_width = types::bit_width(primitive_type);
5✔
217
                if (bit_width == 0) {
5✔
218
                    bit_width = inferred_bit_width;
4✔
219
                } else if (bit_width != inferred_bit_width) {
5✔
220
                    return false;
×
221
                }
222
            }
5✔
223
        }
224
        for (auto& subset : write_subset[container]) {
14✔
225
            auto access_type = classify_access(subset, indvar, moving_symbols);
6✔
226
            if (access_type == AccessType::UNKNOWN) {
6✔
227
                return false;
×
228
            }
229

230
            auto& inferred_type = types::infer_type(sdfg, type, subset);
6✔
231
            auto primitive_type = inferred_type.primitive_type();
6✔
232
            if (primitive_type != types::PrimitiveType::Bool) {
6✔
233
                auto inferred_bit_width = types::bit_width(primitive_type);
6✔
234
                if (bit_width == 0) {
6✔
235
                    bit_width = inferred_bit_width;
1✔
236
                } else if (bit_width != inferred_bit_width) {
6✔
237
                    return false;
×
238
                }
239
            }
6✔
240
        }
241

242
        // Criterion: No inter-loop dependencies
243

244
        // Readonly, private and parallel inter-loop dependencies are trivially safe
245
        if (dep_type == analysis::Parallelism::READONLY ||
13✔
246
            dep_type == analysis::Parallelism::PRIVATE ||
6✔
247
            dep_type == analysis::Parallelism::PARALLEL) {
5✔
248
            continue;
6✔
249
        }
250

251
        // Criterion: Reductions must be supported operations
252
        if (dep_type == analysis::Parallelism::REDUCTION) {
2✔
253
            // Find reduction template
254
            auto writes = users.writes(container);
2✔
255
            if (writes.size() != 1) {
2✔
256
                return false;
×
257
            }
258
            auto element = writes[0]->element();
2✔
259
            if (!dynamic_cast<const data_flow::AccessNode*>(element)) {
2✔
260
                return false;
×
261
            }
262
            auto access_node = static_cast<const data_flow::AccessNode*>(element);
2✔
263
            auto& graph = access_node->get_parent();
2✔
264
            auto& edge = *graph.in_edges(*access_node).begin();
2✔
265
            auto& tasklet = static_cast<const data_flow::Tasklet&>(edge.src());
2✔
266
            if (tasklet.is_conditional()) {
2✔
267
                return false;
×
268
            }
269

270
            auto reads = users.reads(container);
2✔
271
            if (reads.size() == 0) {
2✔
272
                continue;
×
273
            }
274
            if (reads.size() > 1) {
2✔
275
                return false;
×
276
            }
277

278
            if (tasklet.code() != data_flow::TaskletCode::add &&
3✔
279
                tasklet.code() != data_flow::TaskletCode::min &&
1✔
280
                tasklet.code() != data_flow::TaskletCode::max &&
1✔
281
                tasklet.code() != data_flow::TaskletCode::fma &&
1✔
282
                tasklet.code() != data_flow::TaskletCode::assign) {
1✔
283
                return false;
×
284
            }
285
            if (tasklet.code() == data_flow::TaskletCode::fma) {
2✔
286
                // Must be the additive part
287
                for (auto& iedge : graph.in_edges(tasklet)) {
×
288
                    if (iedge.dst_conn() == tasklet.input(2).first) {
×
289
                        auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
290
                        if (read_node.data() != container) {
×
291
                            return false;
×
292
                        }
UNCOV
293
                    }
×
294
                }
UNCOV
295
            } else {
×
296
                bool found_input = false;
2✔
297
                for (auto& iedge : graph.in_edges(tasklet)) {
3✔
298
                    auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
3✔
299
                    if (read_node.data() == container) {
3✔
300
                        found_input = true;
2✔
301
                        break;
2✔
302
                    }
303
                }
304
                if (!found_input) {
2✔
305
                    return false;
×
306
                }
307
            }
308
        }
2✔
309
    }
310
    if (bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64) {
5✔
311
        return false;
×
312
    }
313

314
    return true;
5✔
315
};
6✔
316

317
void Vectorization::apply(Schedule& schedule) {
×
318
    // Set allocation lifetime of loop-local variables
319
    auto& builder = schedule.builder();
×
320
    auto& sdfg = builder.subject();
×
321
    auto& analysis_manager = schedule.analysis_manager();
×
322

323
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
×
324
    auto& dependencies = analysis.get(this->loop_);
×
325
    for (auto& dep : dependencies) {
×
326
        auto& container = dep.first;
×
327
        auto& type = sdfg.type(container);
×
328
        auto& dep_type = dep.second;
×
329

330
        if (dep_type == analysis::Parallelism::PRIVATE) {
×
331
            schedule.allocation_lifetime(container, &this->loop_.root());
×
UNCOV
332
        }
×
333
    }
334

335
    std::string indvar = this->loop_.indvar()->get_name();
×
336
    schedule.allocation_lifetime(indvar, &this->loop_);
×
337

338
    schedule.loop_schedule(&this->loop_, LoopSchedule::VECTORIZATION);
×
339
};
×
340

341
AccessType Vectorization::classify_access(const data_flow::Subset& subset,
14✔
342
                                          const symbolic::Symbol& indvar,
343
                                          const std::unordered_set<std::string>& moving_symbols) {
344
    // Leading dimensions must be constant
345
    for (size_t i = 0; i < subset.size() - 1; i++) {
14✔
346
        auto& dim = subset[i];
×
347
        if (symbolic::uses(dim, indvar->get_name())) {
×
348
            return AccessType::UNKNOWN;
×
349
        }
350
        for (auto& scalar : moving_symbols) {
×
351
            if (symbolic::uses(dim, scalar)) {
×
352
                return AccessType::UNKNOWN;
×
353
            }
354
        }
UNCOV
355
    }
×
356

357
    // Three supported cases: Contiguous, Indirection, Constant
358
    auto& last_dim = subset.back();
14✔
359

360
    // Case: INDIRECTION
361
    symbolic::Symbol gather_variable = SymEngine::null;
14✔
362
    for (auto& scalar : moving_symbols) {
19✔
363
        if (symbolic::uses(last_dim, scalar)) {
5✔
364
            assert(gather_variable == SymEngine::null);
1✔
365
            gather_variable = symbolic::symbol(scalar);
1✔
366
        }
1✔
367
    }
368
    if (gather_variable != SymEngine::null) {
14✔
369
        return AccessType::INDIRECTION;
1✔
370
    }
371

372
    // Case: Contiguous
373
    if (symbolic::uses(last_dim, indvar->get_name())) {
13✔
374
        auto match = symbolic::affine(last_dim, indvar);
7✔
375
        if (match.first == SymEngine::null) {
7✔
376
            return AccessType::UNKNOWN;
×
377
        }
378
        if (!symbolic::eq(match.first, symbolic::integer(1))) {
7✔
379
            return AccessType::UNKNOWN;
×
380
        }
381
        return AccessType::CONTIGUOUS;
7✔
382
    }
7✔
383

384
    // Case: Constant
385
    return AccessType::CONSTANT;
6✔
386
};
14✔
387

388
}  // namespace transformations
389
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc