• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / sdfglib / 15044057891

15 May 2025 11:42AM UTC coverage: 59.37% (+1.8%) from 57.525%
15044057891

push

github

web-flow
Merge pull request #14 from daisytuner/sanitizers

enables sanitizer on unit tests

63 of 67 new or added lines in 47 files covered. (94.03%)

570 existing lines in 62 files now uncovered.

7356 of 12390 relevant lines covered (59.37%)

505.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.15
/src/passes/schedule/stride_minimization.cpp
1
#include "sdfg/passes/schedule/stride_minimization.h"
2

3
#include "sdfg/analysis/assumptions_analysis.h"
4
#include "sdfg/analysis/data_parallelism_analysis.h"
5
#include "sdfg/analysis/users.h"
6
#include "sdfg/transformations/loop_interchange.h"
7

8
namespace sdfg {
9
namespace passes {
10

11
StrideMinimization::StrideMinimization()
16✔
12
    : Pass(){
16✔
13

14
      };
16✔
15

16
bool StrideMinimization::is_admissible(std::vector<std::string>& current,
239✔
17
                                       std::vector<std::string>& target,
18
                                       std::unordered_set<std::string>& allowed_swaps) {
19
    std::vector<size_t> permutation_indices;
239✔
20
    for (auto& p : current) {
932✔
21
        auto it = std::find(target.begin(), target.end(), p);
693✔
22
        permutation_indices.push_back(std::distance(target.begin(), it));
693✔
23
    }
24
    for (size_t i = 0; i < permutation_indices.size(); i++) {
507✔
25
        for (size_t j = 0; j < permutation_indices.size() - i - 1; j++) {
692✔
26
            if (permutation_indices[j] > permutation_indices[j + 1]) {
424✔
27
                std::string swap =
28
                    target[permutation_indices[j]] + "_" + target[permutation_indices[j + 1]];
213✔
29
                if (allowed_swaps.find(swap) == allowed_swaps.end()) {
213✔
30
                    return false;
140✔
31
                }
32
                size_t tmp = permutation_indices[j];
73✔
33
                permutation_indices[j] = permutation_indices[j + 1];
73✔
34
                permutation_indices[j + 1] = tmp;
73✔
35
            }
213✔
36
        }
284✔
37
    }
268✔
38

39
    return true;
99✔
40
};
239✔
41

42
std::unordered_set<std::string> StrideMinimization::allowed_swaps(
57✔
43
    Schedule& schedule, std::vector<structured_control_flow::ControlFlowNode*>& nested_loops) {
44
    auto& builder = schedule.builder();
57✔
45

46
    std::unordered_set<std::string> allowed_swaps;
57✔
47
    for (size_t i = 0; i < nested_loops.size() - 1; i++) {
137✔
48
        if (dynamic_cast<structured_control_flow::While*>(nested_loops.at(i)) ||
160✔
49
            dynamic_cast<structured_control_flow::While*>(nested_loops.at(i + 1))) {
80✔
50
            continue;
×
51
        } else {
52
            auto first_loop = dynamic_cast<structured_control_flow::For*>(nested_loops.at(i));
80✔
53
            auto second_loop = dynamic_cast<structured_control_flow::For*>(nested_loops.at(i + 1));
80✔
54
            auto& parent = builder.parent(*first_loop);
80✔
55
            transformations::LoopInterchange loop_interchange(parent, *first_loop, *second_loop);
80✔
56
            if (loop_interchange.can_be_applied(schedule)) {
80✔
57
                allowed_swaps.insert(first_loop->indvar()->get_name() + "_" +
80✔
58
                                     second_loop->indvar()->get_name());
40✔
59
            }
40✔
60
        }
61
    }
80✔
62
    return allowed_swaps;
57✔
63
};
57✔
64

65
std::pair<bool, std::vector<std::string>> StrideMinimization::can_be_applied(
68✔
66
    Schedule& schedule, std::vector<structured_control_flow::ControlFlowNode*>& nested_loops) {
67
    if (nested_loops.size() < 2) {
68✔
68
        return {false, {}};
11✔
69
    }
70
    auto& builder = schedule.builder();
57✔
71
    auto& sdfg = builder.subject();
57✔
72

73
    size_t while_loops = 0;
57✔
74
    std::vector<std::string> permutation;
57✔
75
    for (auto& loop : nested_loops) {
194✔
76
        if (!dynamic_cast<structured_control_flow::For*>(loop)) {
137✔
77
            permutation.push_back("WHILE_" + std::to_string(while_loops++));
×
UNCOV
78
        } else {
×
79
            auto for_loop = dynamic_cast<structured_control_flow::For*>(loop);
137✔
80
            permutation.push_back(for_loop->indvar()->get_name());
137✔
81
        }
82
    }
83

84
    auto admissible_swaps = allowed_swaps(schedule, nested_loops);
57✔
85

86
    // Collect all memory accesses of body
87
    structured_control_flow::ControlFlowNode* nested_root = nullptr;
57✔
88
    if (auto for_loop = dynamic_cast<structured_control_flow::For*>(nested_loops.at(0))) {
57✔
89
        nested_root = &for_loop->root();
57✔
90
    } else if (auto while_loop =
57✔
91
                   dynamic_cast<structured_control_flow::While*>(nested_loops.at(0))) {
×
92
        nested_root = &while_loop->root();
×
UNCOV
93
    } else {
×
94
        assert(false);
×
95
    }
96
    auto& analysis_manager = schedule.analysis_manager();
57✔
97
    auto& all_users = analysis_manager.get<analysis::Users>();
57✔
98
    analysis::UsersView users(all_users, *nested_root);
57✔
99

100
    // Collect all memory accesses
101
    std::vector<data_flow::Subset> subsets;
57✔
102
    for (auto& use : users.reads()) {
1,365✔
103
        auto element = use->element();
1,308✔
104
        if (!dynamic_cast<data_flow::AccessNode*>(element)) {
1,308✔
105
            continue;
1,020✔
106
        }
107
        auto access = dynamic_cast<data_flow::AccessNode*>(element);
288✔
108
        auto& graph = access->get_parent();
288✔
109
        for (auto& oedge : graph.out_edges(*access)) {
608✔
110
            auto& subset = oedge.subset();
320✔
111

112
            // Filter the subset for only dimensions using the permutation
113
            data_flow::Subset filtered_subset;
320✔
114
            for (auto& dim : subset) {
818✔
115
                for (auto& perm : permutation) {
1,202✔
116
                    if (symbolic::uses(dim, perm)) {
1,011✔
117
                        filtered_subset.push_back(dim);
307✔
118
                        break;
307✔
119
                    }
120
                }
121
            }
122
            if (filtered_subset.empty()) {
320✔
123
                continue;
123✔
124
            }
125

126
            subsets.push_back(filtered_subset);
197✔
127
        }
320✔
128
    }
129
    for (auto& use : users.writes()) {
474✔
130
        auto element = use->element();
417✔
131
        if (!dynamic_cast<data_flow::AccessNode*>(element)) {
417✔
132
            continue;
252✔
133
        }
134
        auto access = dynamic_cast<data_flow::AccessNode*>(element);
165✔
135
        auto& graph = access->get_parent();
165✔
136
        for (auto& iedge : graph.in_edges(*access)) {
330✔
137
            auto& subset = iedge.subset();
165✔
138

139
            // Filter the subset for only dimensions using the permutation
140
            data_flow::Subset filtered_subset;
165✔
141
            for (auto& dim : subset) {
404✔
142
                for (auto& perm : permutation) {
600✔
143
                    if (symbolic::uses(dim, perm)) {
489✔
144
                        filtered_subset.push_back(dim);
128✔
145
                        break;
128✔
146
                    }
147
                }
148
            }
149
            if (filtered_subset.empty()) {
165✔
150
                continue;
85✔
151
            }
152

153
            subsets.push_back(filtered_subset);
80✔
154
        }
165✔
155
    }
156

157
    // Test all permutations (must start from a sorted sequence)
158
    std::vector<std::string> current = permutation;
57✔
159
    std::sort(current.begin(), current.end());
57✔
160

161
    std::vector<std::string> best_permutation;
57✔
162
    std::vector<std::map<size_t, size_t>> best_scores;
57✔
163
    do {
57✔
164
        if (!is_admissible(permutation, current, admissible_swaps)) {
234✔
165
            continue;
137✔
166
        }
167

168
        // Init scores per dimension
169
        std::vector<std::map<size_t, size_t>> scores;
97✔
170
        for (auto& var : current) {
339✔
171
            scores.push_back({});
242✔
172
        }
173

174
        // Compute skipped dimensions and sum up
175
        for (auto& subset : subsets) {
589✔
176
            std::vector<size_t> strides;
492✔
177
            for (auto& var : current) {
1,735✔
178
                bool found = false;
1,243✔
179
                for (size_t j = 0; j < subset.size(); j++) {
2,176✔
180
                    auto& dim = subset.at(j);
1,744✔
181
                    if (symbolic::uses(dim, var)) {
1,744✔
182
                        strides.push_back(subset.size() - j);
811✔
183
                        found = true;
811✔
184
                        break;
811✔
185
                    }
186
                }
933✔
187
                if (!found) {
1,243✔
188
                    strides.push_back(0);
432✔
189
                }
432✔
190
            }
191

192
            for (size_t i = 0; i < current.size(); i++) {
1,735✔
193
                auto& score_dim = scores[i];
1,243✔
194
                if (score_dim.find(strides[i]) == score_dim.end()) {
1,243✔
195
                    score_dim[strides[i]] = 1;
534✔
196
                } else {
534✔
197
                    score_dim[strides[i]]++;
709✔
198
                }
199
            }
1,243✔
200
        }
492✔
201

202
        // Compare scores
203
        if (best_permutation.empty()) {
97✔
204
            best_permutation = current;
57✔
205
            best_scores = scores;
57✔
206
        } else {
57✔
207
            bool better = false;
40✔
208
            bool equal = true;
40✔
209
            for (int i = scores.size() - 1; i >= 0; i--) {
49✔
210
                auto& best_score_dim = best_scores[i];
46✔
211
                auto& score_dim = scores[i];
46✔
212

213
                // Decide by maximal stride
214
                auto best_max_stride = best_score_dim.rbegin()->first;
46✔
215
                auto max_stride = score_dim.rbegin()->first;
46✔
216
                if (max_stride < best_max_stride) {
46✔
217
                    better = true;
15✔
218
                    equal = false;
15✔
219
                    break;
15✔
220
                } else if (max_stride == best_max_stride) {
31✔
221
                    // Decide by number of occurences
222
                    auto best_max_stride_count = best_score_dim.rbegin()->second;
10✔
223
                    auto max_stride_count = score_dim.rbegin()->second;
10✔
224
                    if (max_stride_count < best_max_stride_count) {
10✔
225
                        better = true;
×
226
                        equal = false;
×
227
                        break;
×
228
                    } else if (max_stride_count == best_max_stride_count) {
10✔
229
                        continue;
9✔
230
                    } else {
231
                        equal = false;
1✔
232
                        break;
1✔
233
                    }
234
                } else {
235
                    equal = false;
21✔
236
                    break;
21✔
237
                }
238
            }
239
            if (better) {
40✔
240
                best_permutation = current;
15✔
241
                best_scores = scores;
15✔
242
            } else if (equal) {
40✔
243
                // If equal and original permutation
244
                if (std::equal(current.begin(), current.end(), permutation.begin())) {
3✔
245
                    best_permutation = current;
×
246
                    best_scores = scores;
×
UNCOV
247
                }
×
248
            }
3✔
249
        }
250
    } while (std::next_permutation(current.begin(), current.end()));
234✔
251

252
    // Check if permutation is better than original
253
    return {best_permutation != permutation, best_permutation};
57✔
254
};
68✔
255

256
void StrideMinimization::apply(Schedule& schedule,
9✔
257
                               std::vector<structured_control_flow::ControlFlowNode*>& nested_loops,
258
                               std::vector<std::string> target_permutation) {
259
    auto& builder = schedule.builder();
9✔
260

261
    size_t while_loops = 0;
9✔
262
    std::vector<std::string> permutation;
9✔
263
    for (auto& loop : nested_loops) {
35✔
264
        if (!dynamic_cast<structured_control_flow::For*>(loop)) {
26✔
265
            permutation.push_back("WHILE_" + std::to_string(while_loops++));
×
UNCOV
266
        } else {
×
267
            auto for_loop = dynamic_cast<structured_control_flow::For*>(loop);
26✔
268
            permutation.push_back(for_loop->indvar()->get_name());
26✔
269
        }
270
    }
271
    std::vector<size_t> permutation_indices;
9✔
272
    for (auto& p : permutation) {
35✔
273
        auto it = std::find(target_permutation.begin(), target_permutation.end(), p);
26✔
274
        permutation_indices.push_back(std::distance(target_permutation.begin(), it));
26✔
275
    }
276

277
    // Bubble sort permutation indices
278
    for (size_t i = 0; i < permutation_indices.size(); i++) {
35✔
279
        for (size_t j = 0; j < permutation_indices.size() - i - 1; j++) {
52✔
280
            if (permutation_indices[j] > permutation_indices[j + 1]) {
26✔
281
                auto first_loop = static_cast<structured_control_flow::For*>(nested_loops.at(j));
9✔
282
                auto second_loop =
9✔
283
                    static_cast<structured_control_flow::For*>(nested_loops.at(j + 1));
9✔
284
                auto& parent = builder.parent(*first_loop);
9✔
285
                transformations::LoopInterchange loop_interchange(parent, *first_loop,
18✔
286
                                                                  *second_loop);
9✔
287
                if (!loop_interchange.can_be_applied(schedule)) {
9✔
288
                    throw std::runtime_error("Loop interchange cannot be applied");
×
289
                }
290
                loop_interchange.apply(schedule);
9✔
291
                std::swap(permutation_indices[j], permutation_indices[j + 1]);
9✔
292
            }
9✔
293
        }
26✔
294
    }
26✔
295
};
9✔
296

297
std::vector<structured_control_flow::ControlFlowNode*> StrideMinimization::children(
138✔
298
    structured_control_flow::ControlFlowNode* node,
299
    const std::unordered_map<structured_control_flow::ControlFlowNode*,
300
                             structured_control_flow::ControlFlowNode*>& tree) const {
301
    // Find unique child
302
    std::vector<structured_control_flow::ControlFlowNode*> c;
138✔
303
    for (auto& entry : tree) {
956✔
304
        if (entry.second == node) {
818✔
305
            c.push_back(entry.first);
80✔
306
        }
80✔
307
    }
308
    return c;
138✔
309
};
138✔
310

311
std::list<std::vector<structured_control_flow::ControlFlowNode*>>
312
StrideMinimization::loop_tree_paths(
138✔
313
    structured_control_flow::ControlFlowNode* loop,
314
    const std::unordered_map<structured_control_flow::ControlFlowNode*,
315
                             structured_control_flow::ControlFlowNode*>& tree) const {
316
    // Collect all paths in tree starting from loop recursively (DFS)
317
    std::list<std::vector<structured_control_flow::ControlFlowNode*>> paths;
138✔
318
    auto children = this->children(loop, tree);
138✔
319
    if (children.empty()) {
138✔
320
        paths.push_back({loop});
70✔
321
        return paths;
70✔
322
    }
323

324
    for (auto& child : children) {
148✔
325
        auto p = this->loop_tree_paths(child, tree);
80✔
326
        for (auto& path : p) {
164✔
327
            path.insert(path.begin(), loop);
84✔
328
            paths.push_back(path);
84✔
329
        }
330
    }
80✔
331

332
    return paths;
68✔
333
};
138✔
334

335
std::string StrideMinimization::name() { return "StrideMinimization"; };
×
336

337
bool StrideMinimization::run_pass(Schedule& schedule) {
25✔
338
    bool applied = false;
25✔
339

340
    // Compute loop tree
341
    auto tree = schedule.loop_tree();
25✔
342

343
    // Collect outermost loops
344
    std::list<structured_control_flow::ControlFlowNode*> outer_loops;
25✔
345
    for (auto& node : tree) {
163✔
346
        if (node.second == nullptr) {
138✔
347
            outer_loops.push_back(node.first);
58✔
348
        }
58✔
349
    }
350

351
    // Apply stride minimization
352
    for (auto& outer_loop : outer_loops) {
83✔
353
        auto paths = this->loop_tree_paths(outer_loop, tree);
58✔
354
        for (auto& path : paths) {
117✔
355
            auto [applicable, permutation] = this->can_be_applied(schedule, path);
68✔
356
            if (applicable) {
68✔
357
                this->apply(schedule, path, permutation);
18✔
358
                applied = true;
9✔
359
                break;
9✔
360
            }
361
        }
68✔
362
    }
58✔
363

364
    return applied;
25✔
365
};
25✔
366

367
}  // namespace passes
368
}  // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc