• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28106147644

24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%
28106147644

Pull #806

github

web-flow
Merge 2be414d54 into 57cc1db99
Pull Request #806: Map Collapse for Multiple targets in a neste sequence

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.67
/opt/src/transformations/map_collapse.cpp
1
#include "sdfg/transformations/map_collapse.h"
2

3
#include <string>
4
#include <unordered_set>
5
#include <vector>
6

7
#include "sdfg/analysis/users.h"
8
#include "sdfg/builder/structured_sdfg_builder.h"
9
#include "sdfg/structured_control_flow/if_else.h"
10
#include "sdfg/structured_control_flow/map.h"
11
#include "sdfg/structured_control_flow/sequence.h"
12
#include "sdfg/structured_control_flow/structured_loop.h"
13
#include "sdfg/symbolic/symbolic.h"
14

15
namespace sdfg {
16
namespace transformations {
17

18
MapCollapse::MapCollapse(structured_control_flow::Map& loop, size_t count) : loop_(loop), count_(count) {}
86✔
19

20
std::string MapCollapse::name() const { return "MapCollapse"; }
2✔
21

22
bool MapCollapse::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
74✔
23
    // Criterion: count must be at least 2
24
    if (count_ < 2) {
74✔
25
        return false;
1✔
26
    }
1✔
27

28
    // Fast path: a chain of perfectly-nested maps.
29
    if (this->check_perfect_nest()) {
73✔
30
        return true;
32✔
31
    }
32✔
32

33
    // Imperfect (CUDA-style) collapse is performed one level at a time.
34
    if (count_ == 2 && this->check_imperfect(analysis_manager)) {
41✔
35
        return true;
14✔
36
    }
14✔
37

38
    return false;
27✔
39
}
41✔
40

41
bool MapCollapse::check_perfect_nest() {
106✔
42
    // Collect the chain of count_ perfectly-nested maps
43
    std::vector<structured_control_flow::Map*> maps;
106✔
44
    maps.push_back(&loop_);
106✔
45

46
    auto* current = &loop_;
106✔
47
    for (size_t i = 1; i < count_; ++i) {
183✔
48
        auto& body = current->root();
124✔
49

50
        // Criterion: All target maps must be perfectly nested (exactly one child that is a Map)
51
        if (body.size() != 1) {
124✔
52
            return false;
28✔
53
        }
28✔
54

55
        auto* next = dynamic_cast<structured_control_flow::Map*>(&body.at(0).first);
96✔
56
        if (!next) {
96✔
57
            return false;
18✔
58
        }
18✔
59

60
        // Criterion: The Sequence holding each map must have empty transitions
61
        if (!body.at(0).second.empty()) {
78✔
62
            return false;
1✔
63
        }
1✔
64

65
        maps.push_back(next);
77✔
66
        current = next;
77✔
67
    }
77✔
68

69
    // Criterion: All maps must be contiguous (stride 1)
70
    for (auto* map : maps) {
135✔
71
        if (!map->is_contiguous()) {
135✔
72
            return false;
×
73
        }
×
74
    }
135✔
75

76
    // Collect indvars of all maps being collapsed
77
    symbolic::SymbolSet indvars;
59✔
78
    for (auto* map : maps) {
135✔
79
        indvars.insert(map->indvar());
135✔
80
    }
135✔
81

82
    // Criterion: Map inits may not depend on any of the loop induction variables
83
    for (auto* map : maps) {
135✔
84
        auto init = map->init();
135✔
85
        for (auto& iv : indvars) {
326✔
86
            if (symbolic::uses(init, iv)) {
326✔
87
                return false;
1✔
88
            }
1✔
89
        }
326✔
90
    }
135✔
91

92
    // Criterion: Map bounds may not depend on any of the loop induction variables
93
    // of the maps being collapsed
94
    for (auto* map : maps) {
133✔
95
        auto bound = map->canonical_bound();
133✔
96
        if (bound.is_null()) {
133✔
97
            // If we can't even compute a closed-form bound, be conservative and disallow collapsing
98
            return false;
×
99
        }
×
100
        for (auto& iv : indvars) {
321✔
101
            if (symbolic::uses(bound, iv)) {
321✔
102
                return false;
2✔
103
            }
2✔
104
        }
321✔
105
    }
133✔
106

107
    return true;
56✔
108
}
58✔
109

110
bool MapCollapse::is_collapsible_inner_map(structured_control_flow::Map& map, const symbolic::Symbol& outer_indvar) {
44✔
111
    // Must increment by exactly 1 to participate in the flattened iteration space
112
    if (!map.is_contiguous()) {
44✔
113
        return false;
1✔
114
    }
1✔
115

116
    // Init may not depend on the outer induction variable
117
    if (symbolic::uses(map.init(), outer_indvar)) {
43✔
118
        return false;
1✔
119
    }
1✔
120

121
    // A closed-form bound is required and may not depend on the outer indvar
122
    auto bound = map.canonical_bound();
42✔
123
    if (bound.is_null()) {
42✔
NEW
124
        return false;
×
NEW
125
    }
×
126
    if (symbolic::uses(bound, outer_indvar)) {
42✔
127
        return false;
2✔
128
    }
2✔
129

130
    return true;
40✔
131
}
42✔
132

133
bool MapCollapse::check_imperfect(analysis::AnalysisManager& analysis_manager) {
39✔
134
    // The outer map must itself be collapsible (contiguous, closed-form bound
135
    // that does not depend on its own induction variable).
136
    auto outer_indvar = loop_.indvar();
39✔
137
    if (!loop_.is_contiguous()) {
39✔
138
        return false;
1✔
139
    }
1✔
140
    if (symbolic::uses(loop_.init(), outer_indvar)) {
38✔
NEW
141
        return false;
×
NEW
142
    }
×
143
    auto outer_bound = loop_.canonical_bound();
38✔
144
    if (outer_bound.is_null()) {
38✔
NEW
145
        return false;
×
NEW
146
    }
×
147
    if (symbolic::uses(outer_bound, outer_indvar)) {
38✔
NEW
148
        return false;
×
NEW
149
    }
×
150

151
    auto& body = loop_.root();
38✔
152
    const size_t n = body.size();
38✔
153

154
    std::vector<bool> is_collapsible(n, false);
38✔
155
    size_t num_collapsible = 0;
38✔
156
    for (size_t idx = 0; idx < n; ++idx) {
94✔
157
        // Criterion (v1): transitions between body elements must be empty. Any
158
        // inter-element assignments would need to be re-guarded, which is not
159
        // handled yet.
160
        if (!body.at(idx).second.empty()) {
57✔
161
            return false;
1✔
162
        }
1✔
163

164
        auto* map = dynamic_cast<structured_control_flow::Map*>(&body.at(idx).first);
56✔
165
        if (map != nullptr && this->is_collapsible_inner_map(*map, outer_indvar)) {
56✔
166
            is_collapsible[idx] = true;
24✔
167
            ++num_collapsible;
24✔
168
        }
24✔
169
        // Everything else (blocks, if-else, loops, non-collapsible maps) is a
170
        // "skipped" element that will be replicated on every inner thread.
171
    }
56✔
172

173
    // Need at least one collapsible inner map to flatten against.
174
    if (num_collapsible < 1) {
37✔
175
        return false;
21✔
176
    }
21✔
177

178
    // Data-dependency safety gate (replication model).
179
    //
180
    // The collapse flattens the outer map together with one inner level into a
181
    // single parallel iteration space. Collapsible inner maps run for the valid
182
    // portion of the flattened inner index (`inner < bound`); every other
183
    // ("skipped") body element is *replicated* on every inner thread.
184
    //
185
    // Replication is safe for a skipped element because it is a sibling of the
186
    // inner maps and therefore cannot reference the inner induction variable: its
187
    // reads and writes are identical for all inner threads of the same outer
188
    // iteration. Hence a value produced by a skipped element and consumed by a
189
    // later element (RAW) is reproduced independently on each thread - there is no
190
    // cross-thread dependency.
191
    //
192
    // What replication cannot make safe, and is therefore rejected:
193
    //   * A container written by a *collapsible* map and accessed (read or
194
    //     written) by any other body element: collapsible writes vary across the
195
    //     inner index, so the consumer would observe another thread's data
196
    //     without synchronization (covers RAW, WAR, WAW against collapsible maps).
197
    //   * A write-write conflict between two different body elements on the same
198
    //     container: ordering across threads is no longer guaranteed.
199
    auto& users = analysis_manager.get<analysis::Users>();
16✔
200

201
    std::vector<std::unordered_set<std::string>> writes(n);
16✔
202
    std::vector<std::unordered_set<std::string>> reads(n);
16✔
203
    for (size_t idx = 0; idx < n; ++idx) {
50✔
204
        analysis::UsersView view(users, body.at(idx).first);
34✔
205
        for (auto* u : view.writes()) {
58✔
206
            writes[idx].insert(u->container());
58✔
207
        }
58✔
208
        for (auto* u : view.moves()) {
34✔
NEW
209
            writes[idx].insert(u->container());
×
NEW
210
        }
×
211
        // Views alias memory; treat conservatively as both a read and a write.
212
        for (auto* u : view.views()) {
34✔
NEW
213
            writes[idx].insert(u->container());
×
NEW
214
            reads[idx].insert(u->container());
×
NEW
215
        }
×
216
        for (auto* u : view.reads()) {
107✔
217
            reads[idx].insert(u->container());
107✔
218
        }
107✔
219
    }
34✔
220

221
    for (size_t a = 0; a < n; ++a) {
45✔
222
        for (size_t b = 0; b < n; ++b) {
95✔
223
            if (a == b) {
66✔
224
                continue;
31✔
225
            }
31✔
226
            for (const auto& container : writes[a]) {
35✔
227
                const bool accessed_by_b = writes[b].count(container) != 0 || reads[b].count(container) != 0;
32✔
228
                if (!accessed_by_b) {
32✔
229
                    continue;
28✔
230
                }
28✔
231
                // Unsafe if the writer is a collapsible map (inner-varying write
232
                // shared with another element), or if both elements write the same
233
                // container (write-write conflict). A skipped writer feeding a
234
                // pure reader (RAW) is safe under replication and allowed.
235
                if (is_collapsible[a] || writes[b].count(container) != 0) {
4✔
236
                    return false;
2✔
237
                }
2✔
238
            }
4✔
239
        }
35✔
240
    }
31✔
241

242
    return true;
14✔
243
}
16✔
244

245
void MapCollapse::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
33✔
246
    if (this->check_perfect_nest()) {
33✔
247
        this->apply_perfect(builder, analysis_manager);
24✔
248
    } else {
24✔
249
        this->apply_imperfect(builder, analysis_manager);
9✔
250
    }
9✔
251
}
33✔
252

253
void MapCollapse::apply_perfect(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
24✔
254
    auto& sdfg = builder.subject();
24✔
255

256
    // Step 1: Gather the maps to collapse and their bounds
257
    std::vector<structured_control_flow::Map*> maps;
24✔
258
    maps.push_back(&loop_);
24✔
259
    auto* current = &loop_;
24✔
260
    for (size_t i = 1; i < count_; ++i) {
54✔
261
        auto* next = dynamic_cast<structured_control_flow::Map*>(&current->root().at(0).first);
30✔
262
        maps.push_back(next);
30✔
263
        current = next;
30✔
264
    }
30✔
265

266
    std::vector<symbolic::Symbol> indvars;
24✔
267
    std::vector<symbolic::Expression> bounds;
24✔
268
    for (auto* map : maps) {
54✔
269
        indvars.push_back(map->indvar());
54✔
270
        bounds.push_back(map->canonical_bound());
54✔
271
    }
54✔
272

273
    // Step 2: Compute total iteration count = product of all bounds
274
    symbolic::Expression total_bound = bounds[0];
24✔
275
    for (size_t i = 1; i < bounds.size(); ++i) {
54✔
276
        total_bound = symbolic::mul(total_bound, bounds[i]);
30✔
277
    }
30✔
278

279
    // Step 3: Create the collapsed induction variable
280
    auto civ_name = builder.find_new_name(indvars[0]->get_name() + "_collapsed");
24✔
281
    builder.add_container(civ_name, sdfg.type(indvars[0]->get_name()));
24✔
282
    auto civ = symbolic::symbol(civ_name);
24✔
283

284
    // Step 4: Find the parent sequence of the outermost map
285
    auto parent = static_cast<structured_control_flow::Sequence*>(loop_.get_parent());
24✔
286
    size_t index = parent->index(loop_);
24✔
287
    auto& transition = parent->at(index).second;
24✔
288

289
    // Step 5: Create the new collapsed map before the original
290
    auto& collapsed_map = builder.add_map_before(
24✔
291
        *parent,
24✔
292
        loop_,
24✔
293
        civ,
24✔
294
        symbolic::Lt(civ, total_bound),
24✔
295
        symbolic::integer(0),
24✔
296
        symbolic::add(civ, symbolic::integer(1)),
24✔
297
        loop_.schedule_type(),
24✔
298
        transition.assignments(),
24✔
299
        loop_.debug_info()
24✔
300
    );
24✔
301

302
    // Step 6: Add an empty block for indvar recovery before the original contents
303
    builder.add_block(collapsed_map.root());
24✔
304

305
    // Step 7: Move the body of the innermost map into the collapsed map
306
    auto* innermost = maps.back();
24✔
307
    builder.move_children(innermost->root(), collapsed_map.root());
24✔
308

309
    // Step 8: Add indvar recovery assignments to the transition of the empty
310
    // block so that all induction variables are defined before the original
311
    // loop contents.
312
    //
313
    // For maps [0..n-1] with bounds [B0, B1, ..., B_{n-1}]:
314
    //   indvar_0     = civ / (B1 * B2 * ... * B_{n-1})
315
    //   indvar_k     = (civ / (B_{k+1} * ... * B_{n-1})) % B_k
316
    //   indvar_{n-1} = civ % B_{n-1}
317
    auto& first_transition = collapsed_map.root().at(0).second;
24✔
318
    size_t n = indvars.size();
24✔
319

320
    for (size_t k = 0; k < n; ++k) {
78✔
321
        // Compute suffix product = B_{k+1} * ... * B_{n-1}
322
        symbolic::Expression suffix = symbolic::integer(1);
54✔
323
        for (size_t j = k + 1; j < n; ++j) {
91✔
324
            suffix = symbolic::mul(suffix, bounds[j]);
37✔
325
        }
37✔
326

327
        symbolic::Expression value;
54✔
328
        if (k == 0 && n == 1) {
54✔
329
            // Single-loop degenerate case (shouldn't happen with count>=2, but safe)
330
            value = civ;
×
331
        } else if (k == 0) {
54✔
332
            // Outermost: indvar_0 = civ / suffix
333
            value = symbolic::div(civ, suffix);
24✔
334
        } else if (k == n - 1) {
30✔
335
            // Innermost: indvar_{n-1} = civ % B_{n-1}
336
            value = symbolic::mod(civ, bounds[k]);
24✔
337
        } else {
24✔
338
            // Middle: indvar_k = (civ / suffix) % B_k
339
            value = symbolic::mod(symbolic::div(civ, suffix), bounds[k]);
6✔
340
        }
6✔
341

342
        first_transition.assignments()[indvars[k]] = value;
54✔
343
    }
54✔
344

345
    // Step 9: Remove the original nest
346
    // The index shifted by 1 because we inserted a map before
347
    transition.assignments().clear();
24✔
348
    builder.remove_child(*parent, parent->index(loop_));
24✔
349

350
    applied_ = true;
24✔
351
    collapsed_loop_ = &collapsed_map;
24✔
352
}
24✔
353

354
void MapCollapse::apply_imperfect(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
9✔
355
    auto& sdfg = builder.subject();
9✔
356
    auto& outer = loop_;
9✔
357
    auto outer_indvar = outer.indvar();
9✔
358
    auto& body = outer.root();
9✔
359

360
    // Step 1: Classify the outer body children (in order) and gather the bounds
361
    // of the collapsible inner maps.
362
    struct BodyItem {
9✔
363
        structured_control_flow::ControlFlowNode* node;
9✔
364
        structured_control_flow::Map* map; // non-null only if collapsible
9✔
365
        symbolic::Expression bound; // valid only if collapsible
9✔
366
    };
9✔
367

368
    std::vector<BodyItem> items;
9✔
369
    std::vector<symbolic::Expression> collapsible_bounds;
9✔
370
    for (size_t idx = 0; idx < body.size(); ++idx) {
28✔
371
        auto& child = body.at(idx).first;
19✔
372
        auto* map = dynamic_cast<structured_control_flow::Map*>(&child);
19✔
373
        if (map != nullptr && this->is_collapsible_inner_map(*map, outer_indvar)) {
19✔
374
            auto bound = map->canonical_bound();
16✔
375
            items.push_back({&child, map, bound});
16✔
376
            collapsible_bounds.push_back(bound);
16✔
377
        } else {
16✔
378
            items.push_back({&child, nullptr, SymEngine::null});
3✔
379
        }
3✔
380
    }
19✔
381

382
    // Step 2: Virtual inner extent = max of all collapsible bounds.
383
    symbolic::Expression inner_extent = collapsible_bounds[0];
9✔
384
    for (size_t i = 1; i < collapsible_bounds.size(); ++i) {
16✔
385
        inner_extent = symbolic::max(inner_extent, collapsible_bounds[i]);
7✔
386
    }
7✔
387

388
    auto outer_bound = outer.canonical_bound();
9✔
389
    auto total_bound = symbolic::mul(outer_bound, inner_extent);
9✔
390

391
    // Step 3: Create the collapsed induction variable and the virtual inner index.
392
    auto civ_name = builder.find_new_name(outer_indvar->get_name() + "_collapsed");
9✔
393
    builder.add_container(civ_name, sdfg.type(outer_indvar->get_name()));
9✔
394
    auto civ = symbolic::symbol(civ_name);
9✔
395

396
    auto inner_name = builder.find_new_name(outer_indvar->get_name() + "_inner");
9✔
397
    builder.add_container(inner_name, sdfg.type(outer_indvar->get_name()));
9✔
398
    auto inner_index = symbolic::symbol(inner_name);
9✔
399

400
    // Step 4: Find the parent sequence of the outer map.
401
    auto parent = static_cast<structured_control_flow::Sequence*>(outer.get_parent());
9✔
402
    size_t index = parent->index(outer);
9✔
403
    auto& transition = parent->at(index).second;
9✔
404

405
    // Step 5: Create the collapsed map before the original outer map.
406
    auto& collapsed_map = builder.add_map_before(
9✔
407
        *parent,
9✔
408
        outer,
9✔
409
        civ,
9✔
410
        symbolic::Lt(civ, total_bound),
9✔
411
        symbolic::integer(0),
9✔
412
        symbolic::add(civ, symbolic::integer(1)),
9✔
413
        outer.schedule_type(),
9✔
414
        transition.assignments(),
9✔
415
        outer.debug_info()
9✔
416
    );
9✔
417

418
    // Step 6: Recovery block defining the outer index and the virtual inner index:
419
    //   outer_indvar = civ / inner_extent
420
    //   inner_index  = civ % inner_extent
421
    builder.add_block(collapsed_map.root());
9✔
422
    auto& recovery_transition = collapsed_map.root().at(0).second;
9✔
423
    recovery_transition.assignments()[outer_indvar] = symbolic::div(civ, inner_extent);
9✔
424
    recovery_transition.assignments()[inner_index] = symbolic::mod(civ, inner_extent);
9✔
425

426
    // Step 7: Move the outer body into the collapsed map (after the recovery block),
427
    // preserving order.
428
    builder.move_children(outer.root(), collapsed_map.root());
9✔
429

430
    // Step 8: Process each original body element. Collapsible maps run for the
431
    // valid portion of the inner extent (`inner_index < bound`). Every other
432
    // ("skipped") element is replicated: it stays a direct child of the collapsed
433
    // body and therefore runs on every inner thread. Because a skipped element is
434
    // a sibling of the inner maps it cannot reference the inner index, so all
435
    // inner threads of an outer iteration execute it identically.
436
    for (auto& item : items) {
19✔
437
        auto* child = item.node;
19✔
438
        if (item.map != nullptr) {
19✔
439
            auto inner_iv = item.map->indvar();
16✔
440

441
            auto& if_else = builder.add_if_else_before(collapsed_map.root(), *child, {}, outer.debug_info());
16✔
442
            auto& branch = builder.add_case(if_else, symbolic::Lt(inner_index, item.bound), outer.debug_info());
16✔
443

444
            // Recover the inner induction variable from the virtual index.
445
            builder.add_block(branch);
16✔
446
            branch.at(0).second.assignments()[inner_iv] = inner_index;
16✔
447

448
            // Move the map body into the guarded branch and drop the empty map shell.
449
            builder.move_children(item.map->root(), branch);
16✔
450
            builder.remove_child(collapsed_map.root(), collapsed_map.root().index(*child));
16✔
451
        }
16✔
452
        // Skipped elements are left in place (replicated on every inner thread).
453
    }
19✔
454

455
    // Step 9: Remove the original outer map.
456
    transition.assignments().clear();
9✔
457
    builder.remove_child(*parent, parent->index(outer));
9✔
458

459
    applied_ = true;
9✔
460
    collapsed_loop_ = &collapsed_map;
9✔
461

462
    analysis_manager.invalidate_all();
9✔
463
}
9✔
464

465
void MapCollapse::to_json(nlohmann::json& j) const {
1✔
466
    j["transformation_type"] = this->name();
1✔
467
    j["parameters"] = nlohmann::json::object();
1✔
468
    j["parameters"] = {{"count", count_}};
1✔
469

470
    serializer::JSONSerializer ser_flat(false);
1✔
471
    j["subgraph"] = nlohmann::json::object();
1✔
472
    j["subgraph"]["0"] = nlohmann::json::object();
1✔
473
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
1✔
474
}
1✔
475

476
MapCollapse MapCollapse::from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& desc) {
1✔
477
    auto loop_id = desc["subgraph"]["0"]["element_id"].get<size_t>();
1✔
478
    size_t count = desc["parameters"]["count"].get<size_t>();
1✔
479
    auto element = builder.find_element_by_id(loop_id);
1✔
480
    if (!element) {
1✔
UNCOV
481
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
UNCOV
482
    }
×
483
    auto loop = dynamic_cast<structured_control_flow::Map*>(element);
1✔
484

485
    return MapCollapse(*loop, count);
1✔
486
}
1✔
487

488
structured_control_flow::Map* MapCollapse::collapsed_loop() {
34✔
489
    if (!applied_) {
34✔
490
        return &loop_;
1✔
491
    }
1✔
492

493
    return collapsed_loop_;
33✔
494
}
34✔
495

496
} // namespace transformations
497
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc