• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15512041711

07 Jun 2025 09:59PM UTC coverage: 57.416% (+0.1%) from 57.315%
15512041711

push

github

web-flow
Merge pull request #44 from daisytuner/StructuredLoops

Add Structured Loops

51 of 102 new or added lines in 20 files covered. (50.0%)

10 existing lines in 8 files now uncovered.

7618 of 13268 relevant lines covered (57.42%)

116.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/transformations/vectorization.cpp
1
#include "sdfg/transformations/vectorization.h"
2

3
#include "sdfg/analysis/data_parallelism_analysis.h"
4
#include "sdfg/types/utils.h"
5

6
namespace sdfg {
7
namespace transformations {
8

9
bool Vectorization::tasklet_supported(data_flow::TaskletCode c) {
×
10
    switch (c) {
×
11
        case data_flow::TaskletCode::assign:
12
            return true;
×
13
        case data_flow::TaskletCode::neg:
14
            return true;
×
15
        case data_flow::TaskletCode::add:
16
            return true;
×
17
        case data_flow::TaskletCode::sub:
18
            return true;
×
19
        case data_flow::TaskletCode::mul:
20
            return true;
×
21
        case data_flow::TaskletCode::div:
22
            return true;
×
23
        case data_flow::TaskletCode::fma:
24
            return true;
×
25
        case data_flow::TaskletCode::mod:
26
            return true;
×
27
        case data_flow::TaskletCode::abs:
28
            return true;
×
29
        case data_flow::TaskletCode::fabs:
30
            return true;
×
31
        case data_flow::TaskletCode::max:
32
            return true;
×
33
        case data_flow::TaskletCode::min:
34
            return true;
×
35
        case data_flow::TaskletCode::sqrt:
36
            return true;
×
37
        case data_flow::TaskletCode::sqrtf:
38
            return true;
×
39
        case data_flow::TaskletCode::sin:
40
            return true;
×
41
        case data_flow::TaskletCode::cos:
42
            return true;
×
43
        case data_flow::TaskletCode::tan:
44
            return true;
×
45
        case data_flow::TaskletCode::pow:
46
            return true;
×
47
        case data_flow::TaskletCode::exp:
48
            return true;
×
49
        case data_flow::TaskletCode::expf:
50
            return true;
×
51
        case data_flow::TaskletCode::exp2:
52
            return true;
×
53
        case data_flow::TaskletCode::log:
54
            return true;
×
55
        case data_flow::TaskletCode::log2:
56
            return true;
×
57
        case data_flow::TaskletCode::log10:
58
            return true;
×
59
        case data_flow::TaskletCode::floor:
60
            return true;
×
61
        case data_flow::TaskletCode::ceil:
62
            return true;
×
63
        case data_flow::TaskletCode::trunc:
64
            return true;
×
65
        case data_flow::TaskletCode::round:
66
            return true;
×
67
        case data_flow::TaskletCode::olt:
68
            return true;
×
69
        case data_flow::TaskletCode::ole:
70
            return true;
×
71
        case data_flow::TaskletCode::oeq:
72
            return true;
×
73
        case data_flow::TaskletCode::one:
74
            return true;
×
75
        case data_flow::TaskletCode::ogt:
76
            return true;
×
77
        case data_flow::TaskletCode::oge:
78
            return true;
×
79
        case data_flow::TaskletCode::bitwise_and:
80
            return true;
×
81
        case data_flow::TaskletCode::bitwise_or:
82
            return true;
×
83
        case data_flow::TaskletCode::bitwise_xor:
84
            return true;
×
85
        case data_flow::TaskletCode::bitwise_not:
86
            return true;
×
87
        default:
NEW
88
            return false;
×
89
    }
UNCOV
90
};
×
91

92
Vectorization::Vectorization(structured_control_flow::Sequence& parent,
×
93
                             structured_control_flow::StructuredLoop& loop)
94
    : loop_(loop) {
×
95

96
      };
×
97

98
std::string Vectorization::name() { return "Vectorization"; };
×
99

100
bool Vectorization::can_be_applied(Schedule& schedule) {
×
101
    auto& sdfg = schedule.builder().subject();
×
102
    auto& analysis_manager = schedule.analysis_manager();
×
103
    auto& body = this->loop_.root();
×
104

105
    // Section: CFG
106

107
    // Criterion: Body must only consists of blocks
108
    std::unordered_set<const data_flow::Tasklet*> tasklets;
×
109
    std::list<const structured_control_flow::ControlFlowNode*> queue = {&body};
×
110
    while (!queue.empty()) {
×
111
        auto node = queue.front();
×
112
        queue.pop_front();
×
113

114
        if (auto block = dynamic_cast<const structured_control_flow::Block*>(node)) {
×
115
            for (auto& child : block->dataflow().nodes()) {
×
116
                if (auto tasklet = dynamic_cast<const data_flow::Tasklet*>(&child)) {
×
117
                    tasklets.insert(tasklet);
×
118
                }
×
119
            }
120
        } else if (auto sequence = dynamic_cast<const structured_control_flow::Sequence*>(node)) {
×
121
            for (size_t i = 0; i < sequence->size(); i++) {
×
122
                if (sequence->at(i).second.assignments().size() > 0) {
×
123
                    return false;
×
124
                }
125
                queue.push_back(&sequence->at(i).first);
×
126
            }
×
127
        } else {
×
128
            return false;
×
129
        }
130
    }
131

132
    // Section: Operations
133

134
    // Criterion: All tasklets must be mappable to vector instructions
135
    for (auto& tasklet : tasklets) {
×
136
        if (!tasklet_supported(tasklet->code())) {
×
137
            return false;
×
138
        }
139

140
        // Criterion: All connectors must have same type
141
        types::PrimitiveType operation_type = types::PrimitiveType::Void;
×
142
        for (auto& inp : tasklet->inputs()) {
×
143
            const types::Scalar& type = inp.second;
×
144
            if (operation_type == types::PrimitiveType::Void) {
×
145
                operation_type = type.primitive_type();
×
146
            } else if (operation_type != type.primitive_type()) {
×
147
                return false;
×
148
            }
149
        }
150
    }
151

152
    // Section: Memory accesses
153

154
    // Criterion: Loop is contiguous
155
    auto indvar = this->loop_.indvar();
×
156
    if (!analysis::DataParallelismAnalysis::is_contiguous(this->loop_)) {
×
157
        return false;
×
158
    }
159

160
    // Criterion: No pointer operations
161
    auto& all_users = analysis_manager.get<analysis::Users>();
×
162
    analysis::UsersView users(all_users, body);
×
163
    if (!users.views().empty() || !users.moves().empty()) {
×
164
        return false;
×
165
    }
166

167
    // Determine moving symbols, i.e., pseudo loop iterators
168
    std::unordered_set<std::string> moving_symbols;
×
169
    auto write_subset = users.write_subsets();
×
170
    for (auto& entry : write_subset) {
×
171
        auto& data = entry.first;
×
172
        auto& type = sdfg.type(data);
×
173
        if (!dynamic_cast<const types::Scalar*>(&type)) {
×
174
            continue;
×
175
        }
176
        if (!types::is_integer(type.primitive_type())) {
×
177
            continue;
×
178
        }
179
        moving_symbols.insert(data);
×
180
    }
181

182
    size_t bit_width = 0;
×
183

184
    // Criterion: All memory accesses must be vectorizable
185
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
×
186
    auto& dependencies = analysis.get(this->loop_);
×
187
    if (dependencies.size() == 0) {
×
188
        return false;
×
189
    }
190

191
    auto read_subset = users.read_subsets();
×
192
    for (auto& dep : dependencies) {
×
193
        auto& container = dep.first;
×
194
        auto& type = sdfg.type(container);
×
195
        auto& dep_type = dep.second;
×
196

197
        // Criterion: Non-parallel accesses must be valid scatter operations
198
        if (dep_type == analysis::Parallelism::DEPENDENT) {
×
199
            return false;
×
200
        }
201

202
        // Criterion: All accesses are vectorizable and have matching bit width
203
        for (auto& subset : read_subset[container]) {
×
204
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
205
            if (access_type == AccessType::UNKNOWN) {
×
206
                return false;
×
207
            }
208

209
            // Symbolic expressions are casted to the right bit width
210
            if (type.is_symbol() && types::is_integer(type.primitive_type())) {
×
211
                continue;
×
212
            }
213

214
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
215
            auto primitive_type = inferred_type.primitive_type();
×
216
            if (primitive_type != types::PrimitiveType::Bool) {
×
217
                auto inferred_bit_width = types::bit_width(primitive_type);
×
218
                if (bit_width == 0) {
×
219
                    bit_width = inferred_bit_width;
×
220
                } else if (bit_width != inferred_bit_width) {
×
221
                    return false;
×
222
                }
223
            }
×
224
        }
225
        for (auto& subset : write_subset[container]) {
×
226
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
227
            if (access_type == AccessType::UNKNOWN) {
×
228
                return false;
×
229
            }
230

231
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
232
            auto primitive_type = inferred_type.primitive_type();
×
233
            if (primitive_type != types::PrimitiveType::Bool) {
×
234
                auto inferred_bit_width = types::bit_width(primitive_type);
×
235
                if (bit_width == 0) {
×
236
                    bit_width = inferred_bit_width;
×
237
                } else if (bit_width != inferred_bit_width) {
×
238
                    return false;
×
239
                }
240
            }
×
241
        }
242

243
        // Criterion: No inter-loop dependencies
244

245
        // Readonly, private and parallel inter-loop dependencies are trivially safe
246
        if (dep_type == analysis::Parallelism::READONLY ||
×
247
            dep_type == analysis::Parallelism::PRIVATE ||
×
248
            dep_type == analysis::Parallelism::PARALLEL) {
×
249
            continue;
×
250
        }
251

252
        // Criterion: Reductions must be supported operations
253
        if (dep_type == analysis::Parallelism::REDUCTION) {
×
254
            // Find reduction template
255
            auto writes = users.writes(container);
×
256
            if (writes.size() != 1) {
×
257
                return false;
×
258
            }
259
            auto element = writes[0]->element();
×
260
            if (!dynamic_cast<const data_flow::AccessNode*>(element)) {
×
261
                return false;
×
262
            }
263
            auto access_node = static_cast<const data_flow::AccessNode*>(element);
×
264
            auto& graph = access_node->get_parent();
×
265
            auto& edge = *graph.in_edges(*access_node).begin();
×
266
            auto& tasklet = static_cast<const data_flow::Tasklet&>(edge.src());
×
267
            if (tasklet.is_conditional()) {
×
268
                return false;
×
269
            }
270

271
            auto reads = users.reads(container);
×
272
            if (reads.size() == 0) {
×
273
                continue;
×
274
            }
275
            if (reads.size() > 1) {
×
276
                return false;
×
277
            }
278

279
            if (tasklet.code() != data_flow::TaskletCode::add &&
×
280
                tasklet.code() != data_flow::TaskletCode::min &&
×
281
                tasklet.code() != data_flow::TaskletCode::max &&
×
282
                tasklet.code() != data_flow::TaskletCode::fma &&
×
283
                tasklet.code() != data_flow::TaskletCode::assign) {
×
284
                return false;
×
285
            }
286
            if (tasklet.code() == data_flow::TaskletCode::fma) {
×
287
                // Must be the additive part
288
                for (auto& iedge : graph.in_edges(tasklet)) {
×
289
                    if (iedge.dst_conn() == tasklet.input(2).first) {
×
290
                        auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
291
                        if (read_node.data() != container) {
×
292
                            return false;
×
293
                        }
294
                    }
×
295
                }
296
            } else {
×
297
                bool found_input = false;
×
298
                for (auto& iedge : graph.in_edges(tasklet)) {
×
299
                    auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
300
                    if (read_node.data() == container) {
×
301
                        found_input = true;
×
302
                        break;
×
303
                    }
304
                }
305
                if (!found_input) {
×
306
                    return false;
×
307
                }
308
            }
309
        }
×
310
    }
311
    if (bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64) {
×
312
        return false;
×
313
    }
314

315
    return true;
×
316
};
×
317

318
void Vectorization::apply(Schedule& schedule) {
×
319
    schedule.loop_schedule(&this->loop_, LoopSchedule::VECTORIZATION);
×
320
};
×
321

322
AccessType Vectorization::classify_access(const data_flow::Subset& subset,
×
323
                                          const symbolic::Symbol& indvar,
324
                                          const std::unordered_set<std::string>& moving_symbols) {
325
    // Leading dimensions must be constant
326
    for (size_t i = 0; i < subset.size() - 1; i++) {
×
327
        auto& dim = subset[i];
×
328
        if (symbolic::uses(dim, indvar->get_name())) {
×
329
            return AccessType::UNKNOWN;
×
330
        }
331
        for (auto& scalar : moving_symbols) {
×
332
            if (symbolic::uses(dim, scalar)) {
×
333
                return AccessType::UNKNOWN;
×
334
            }
335
        }
336
    }
×
337

338
    // Three supported cases: Contiguous, Indirection, Constant
339
    auto& last_dim = subset.back();
×
340

341
    // Case: INDIRECTION
342
    symbolic::Symbol gather_variable = SymEngine::null;
×
343
    for (auto& scalar : moving_symbols) {
×
344
        if (symbolic::uses(last_dim, scalar)) {
×
345
            assert(gather_variable == SymEngine::null);
×
346
            gather_variable = symbolic::symbol(scalar);
×
347
        }
×
348
    }
349
    if (gather_variable != SymEngine::null) {
×
350
        return AccessType::INDIRECTION;
×
351
    }
352

353
    // Case: Contiguous
354
    if (symbolic::uses(last_dim, indvar->get_name())) {
×
355
        auto match = symbolic::affine(last_dim, indvar);
×
356
        if (match.first == SymEngine::null) {
×
357
            return AccessType::UNKNOWN;
×
358
        }
359
        if (!symbolic::eq(match.first, symbolic::integer(1))) {
×
360
            return AccessType::UNKNOWN;
×
361
        }
362
        return AccessType::CONTIGUOUS;
×
363
    }
×
364

365
    // Case: Constant
366
    return AccessType::CONSTANT;
×
367
};
×
368

369
}  // namespace transformations
370
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc