• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15455990921

05 Jun 2025 01:03AM UTC coverage: 57.883% (-0.4%) from 58.236%
15455990921

push

github

web-flow
Merge pull request #56 from daisytuner/allocations

removes allocation handling

15 of 36 new or added lines in 4 files covered. (41.67%)

27 existing lines in 4 files now uncovered.

8018 of 13852 relevant lines covered (57.88%)

107.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/transformations/vectorization.cpp
1
#include "sdfg/transformations/vectorization.h"
2

3
#include "sdfg/types/utils.h"
4

5
namespace sdfg {
6
namespace transformations {
7

8
bool Vectorization::tasklet_supported(data_flow::TaskletCode c) {
×
9
    switch (c) {
×
10
        case data_flow::TaskletCode::assign:
11
            return true;
×
12
        case data_flow::TaskletCode::neg:
13
            return true;
×
14
        case data_flow::TaskletCode::add:
15
            return true;
×
16
        case data_flow::TaskletCode::sub:
17
            return true;
×
18
        case data_flow::TaskletCode::mul:
19
            return true;
×
20
        case data_flow::TaskletCode::div:
21
            return true;
×
22
        case data_flow::TaskletCode::fma:
23
            return true;
×
24
        case data_flow::TaskletCode::mod:
25
            return true;
×
26
        case data_flow::TaskletCode::abs:
27
            return true;
×
28
        case data_flow::TaskletCode::fabs:
29
            return true;
×
30
        case data_flow::TaskletCode::max:
31
            return true;
×
32
        case data_flow::TaskletCode::min:
33
            return true;
×
34
        case data_flow::TaskletCode::sqrt:
35
            return true;
×
36
        case data_flow::TaskletCode::sqrtf:
37
            return true;
×
38
        case data_flow::TaskletCode::sin:
39
            return true;
×
40
        case data_flow::TaskletCode::cos:
41
            return true;
×
42
        case data_flow::TaskletCode::tan:
43
            return true;
×
44
        case data_flow::TaskletCode::pow:
45
            return true;
×
46
        case data_flow::TaskletCode::exp:
47
            return true;
×
48
        case data_flow::TaskletCode::expf:
49
            return true;
×
50
        case data_flow::TaskletCode::exp2:
51
            return true;
×
52
        case data_flow::TaskletCode::log:
53
            return true;
×
54
        case data_flow::TaskletCode::log2:
55
            return true;
×
56
        case data_flow::TaskletCode::log10:
57
            return true;
×
58
        case data_flow::TaskletCode::floor:
59
            return true;
×
60
        case data_flow::TaskletCode::ceil:
61
            return true;
×
62
        case data_flow::TaskletCode::trunc:
63
            return true;
×
64
        case data_flow::TaskletCode::round:
65
            return true;
×
66
        case data_flow::TaskletCode::olt:
67
            return true;
×
68
        case data_flow::TaskletCode::ole:
69
            return true;
×
70
        case data_flow::TaskletCode::oeq:
71
            return true;
×
72
        case data_flow::TaskletCode::one:
73
            return true;
×
74
        case data_flow::TaskletCode::ogt:
75
            return true;
×
76
        case data_flow::TaskletCode::oge:
77
            return true;
×
78
        case data_flow::TaskletCode::bitwise_and:
79
            return true;
×
80
        case data_flow::TaskletCode::bitwise_or:
81
            return true;
×
82
        case data_flow::TaskletCode::bitwise_xor:
83
            return true;
×
84
        case data_flow::TaskletCode::bitwise_not:
85
            return true;
×
86
    }
87

88
    return false;
×
89
};
×
90

91
Vectorization::Vectorization(structured_control_flow::Sequence& parent,
×
92
                             structured_control_flow::For& loop)
93
    : loop_(loop) {
×
94

95
      };
×
96

97
std::string Vectorization::name() { return "Vectorization"; };
×
98

99
bool Vectorization::can_be_applied(Schedule& schedule) {
×
100
    auto& sdfg = schedule.builder().subject();
×
101
    auto& analysis_manager = schedule.analysis_manager();
×
102
    auto& body = this->loop_.root();
×
103

104
    // Section: CFG
105

106
    // Criterion: Body must only consists of blocks
107
    std::unordered_set<const data_flow::Tasklet*> tasklets;
×
108
    std::list<const structured_control_flow::ControlFlowNode*> queue = {&body};
×
109
    while (!queue.empty()) {
×
110
        auto node = queue.front();
×
111
        queue.pop_front();
×
112

113
        if (auto block = dynamic_cast<const structured_control_flow::Block*>(node)) {
×
114
            for (auto& child : block->dataflow().nodes()) {
×
115
                if (auto tasklet = dynamic_cast<const data_flow::Tasklet*>(&child)) {
×
116
                    tasklets.insert(tasklet);
×
117
                }
×
118
            }
119
        } else if (auto sequence = dynamic_cast<const structured_control_flow::Sequence*>(node)) {
×
120
            for (size_t i = 0; i < sequence->size(); i++) {
×
121
                if (sequence->at(i).second.assignments().size() > 0) {
×
122
                    return false;
×
123
                }
124
                queue.push_back(&sequence->at(i).first);
×
125
            }
×
126
        } else {
×
127
            return false;
×
128
        }
129
    }
130

131
    // Section: Operations
132

133
    // Criterion: All tasklets must be mappable to vector instructions
134
    for (auto& tasklet : tasklets) {
×
135
        if (!tasklet_supported(tasklet->code())) {
×
136
            return false;
×
137
        }
138

139
        // Criterion: All connectors must have same type
140
        types::PrimitiveType operation_type = types::PrimitiveType::Void;
×
141
        for (auto& inp : tasklet->inputs()) {
×
142
            const types::Scalar& type = inp.second;
×
143
            if (operation_type == types::PrimitiveType::Void) {
×
144
                operation_type = type.primitive_type();
×
145
            } else if (operation_type != type.primitive_type()) {
×
146
                return false;
×
147
            }
148
        }
149
    }
150

151
    // Section: Memory accesses
152

153
    // Criterion: Loop is contiguous
154
    auto indvar = this->loop_.indvar();
×
155
    if (!analysis::DataParallelismAnalysis::is_contiguous(this->loop_)) {
×
156
        return false;
×
157
    }
158

159
    // Criterion: No pointer operations
160
    auto& all_users = analysis_manager.get<analysis::Users>();
×
161
    analysis::UsersView users(all_users, body);
×
162
    if (!users.views().empty() || !users.moves().empty()) {
×
163
        return false;
×
164
    }
165

166
    // Determine moving symbols, i.e., pseudo loop iterators
167
    std::unordered_set<std::string> moving_symbols;
×
168
    auto write_subset = users.write_subsets();
×
169
    for (auto& entry : write_subset) {
×
170
        auto& data = entry.first;
×
171
        auto& type = sdfg.type(data);
×
172
        if (!dynamic_cast<const types::Scalar*>(&type)) {
×
173
            continue;
×
174
        }
175
        if (!types::is_integer(type.primitive_type())) {
×
176
            continue;
×
177
        }
178
        moving_symbols.insert(data);
×
179
    }
180

181
    size_t bit_width = 0;
×
182

183
    // Criterion: All memory accesses must be vectorizable
184
    auto& analysis = analysis_manager.get<analysis::DataParallelismAnalysis>();
×
185
    auto& dependencies = analysis.get(this->loop_);
×
186
    if (dependencies.size() == 0) {
×
187
        return false;
×
188
    }
189

190
    auto read_subset = users.read_subsets();
×
191
    for (auto& dep : dependencies) {
×
192
        auto& container = dep.first;
×
193
        auto& type = sdfg.type(container);
×
194
        auto& dep_type = dep.second;
×
195

196
        // Criterion: Non-parallel accesses must be valid scatter operations
197
        if (dep_type == analysis::Parallelism::DEPENDENT) {
×
198
            return false;
×
199
        }
200

201
        // Criterion: All accesses are vectorizable and have matching bit width
202
        for (auto& subset : read_subset[container]) {
×
203
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
204
            if (access_type == AccessType::UNKNOWN) {
×
205
                return false;
×
206
            }
207

208
            // Symbolic expressions are casted to the right bit width
209
            if (type.is_symbol() && types::is_integer(type.primitive_type())) {
×
210
                continue;
×
211
            }
212

213
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
214
            auto primitive_type = inferred_type.primitive_type();
×
215
            if (primitive_type != types::PrimitiveType::Bool) {
×
216
                auto inferred_bit_width = types::bit_width(primitive_type);
×
217
                if (bit_width == 0) {
×
218
                    bit_width = inferred_bit_width;
×
219
                } else if (bit_width != inferred_bit_width) {
×
220
                    return false;
×
221
                }
222
            }
×
223
        }
224
        for (auto& subset : write_subset[container]) {
×
225
            auto access_type = classify_access(subset, indvar, moving_symbols);
×
226
            if (access_type == AccessType::UNKNOWN) {
×
227
                return false;
×
228
            }
229

230
            auto& inferred_type = types::infer_type(sdfg, type, subset);
×
231
            auto primitive_type = inferred_type.primitive_type();
×
232
            if (primitive_type != types::PrimitiveType::Bool) {
×
233
                auto inferred_bit_width = types::bit_width(primitive_type);
×
234
                if (bit_width == 0) {
×
235
                    bit_width = inferred_bit_width;
×
236
                } else if (bit_width != inferred_bit_width) {
×
237
                    return false;
×
238
                }
239
            }
×
240
        }
241

242
        // Criterion: No inter-loop dependencies
243

244
        // Readonly, private and parallel inter-loop dependencies are trivially safe
245
        if (dep_type == analysis::Parallelism::READONLY ||
×
246
            dep_type == analysis::Parallelism::PRIVATE ||
×
247
            dep_type == analysis::Parallelism::PARALLEL) {
×
248
            continue;
×
249
        }
250

251
        // Criterion: Reductions must be supported operations
252
        if (dep_type == analysis::Parallelism::REDUCTION) {
×
253
            // Find reduction template
254
            auto writes = users.writes(container);
×
255
            if (writes.size() != 1) {
×
256
                return false;
×
257
            }
258
            auto element = writes[0]->element();
×
259
            if (!dynamic_cast<const data_flow::AccessNode*>(element)) {
×
260
                return false;
×
261
            }
262
            auto access_node = static_cast<const data_flow::AccessNode*>(element);
×
263
            auto& graph = access_node->get_parent();
×
264
            auto& edge = *graph.in_edges(*access_node).begin();
×
265
            auto& tasklet = static_cast<const data_flow::Tasklet&>(edge.src());
×
266
            if (tasklet.is_conditional()) {
×
267
                return false;
×
268
            }
269

270
            auto reads = users.reads(container);
×
271
            if (reads.size() == 0) {
×
272
                continue;
×
273
            }
274
            if (reads.size() > 1) {
×
275
                return false;
×
276
            }
277

278
            if (tasklet.code() != data_flow::TaskletCode::add &&
×
279
                tasklet.code() != data_flow::TaskletCode::min &&
×
280
                tasklet.code() != data_flow::TaskletCode::max &&
×
281
                tasklet.code() != data_flow::TaskletCode::fma &&
×
282
                tasklet.code() != data_flow::TaskletCode::assign) {
×
283
                return false;
×
284
            }
285
            if (tasklet.code() == data_flow::TaskletCode::fma) {
×
286
                // Must be the additive part
287
                for (auto& iedge : graph.in_edges(tasklet)) {
×
288
                    if (iedge.dst_conn() == tasklet.input(2).first) {
×
289
                        auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
290
                        if (read_node.data() != container) {
×
291
                            return false;
×
292
                        }
293
                    }
×
294
                }
295
            } else {
×
296
                bool found_input = false;
×
297
                for (auto& iedge : graph.in_edges(tasklet)) {
×
298
                    auto& read_node = dynamic_cast<const data_flow::AccessNode&>(iedge.src());
×
299
                    if (read_node.data() == container) {
×
300
                        found_input = true;
×
301
                        break;
×
302
                    }
303
                }
304
                if (!found_input) {
×
305
                    return false;
×
306
                }
307
            }
308
        }
×
309
    }
310
    if (bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64) {
×
311
        return false;
×
312
    }
313

314
    return true;
×
315
};
×
316

317
void Vectorization::apply(Schedule& schedule) {
×
UNCOV
318
    schedule.loop_schedule(&this->loop_, LoopSchedule::VECTORIZATION);
×
319
};
×
320

321
AccessType Vectorization::classify_access(const data_flow::Subset& subset,
×
322
                                          const symbolic::Symbol& indvar,
323
                                          const std::unordered_set<std::string>& moving_symbols) {
324
    // Leading dimensions must be constant
325
    for (size_t i = 0; i < subset.size() - 1; i++) {
×
326
        auto& dim = subset[i];
×
327
        if (symbolic::uses(dim, indvar->get_name())) {
×
328
            return AccessType::UNKNOWN;
×
329
        }
330
        for (auto& scalar : moving_symbols) {
×
331
            if (symbolic::uses(dim, scalar)) {
×
332
                return AccessType::UNKNOWN;
×
333
            }
334
        }
335
    }
×
336

337
    // Three supported cases: Contiguous, Indirection, Constant
338
    auto& last_dim = subset.back();
×
339

340
    // Case: INDIRECTION
341
    symbolic::Symbol gather_variable = SymEngine::null;
×
342
    for (auto& scalar : moving_symbols) {
×
343
        if (symbolic::uses(last_dim, scalar)) {
×
344
            assert(gather_variable == SymEngine::null);
×
345
            gather_variable = symbolic::symbol(scalar);
×
346
        }
×
347
    }
348
    if (gather_variable != SymEngine::null) {
×
349
        return AccessType::INDIRECTION;
×
350
    }
351

352
    // Case: Contiguous
353
    if (symbolic::uses(last_dim, indvar->get_name())) {
×
354
        auto match = symbolic::affine(last_dim, indvar);
×
355
        if (match.first == SymEngine::null) {
×
356
            return AccessType::UNKNOWN;
×
357
        }
358
        if (!symbolic::eq(match.first, symbolic::integer(1))) {
×
359
            return AccessType::UNKNOWN;
×
360
        }
361
        return AccessType::CONTIGUOUS;
×
362
    }
×
363

364
    // Case: Constant
365
    return AccessType::CONSTANT;
×
366
};
×
367

368
}  // namespace transformations
369
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc