• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28190635525

25 Jun 2026 06:08PM UTC coverage: 61.743% (+0.1%) from 61.644%
28190635525

push

github

web-flow
adds reduce as new structured loop type (#802)

* adds Reduce loop to structured loops

* extends For2Map into a general detection of reduce and map

* makes serialization of reduce backward compatible

* counts reduce as fors in loop analysis

* renames serializer for structured loops

* updates verifier

* adds support for FMA

* updates loop report to count map, reduce, for separately

* updates verifier after merge

* removes non-existent api function in tests after merge

* updates transformations to handle reduce

* adds vectorize dispatcher for reduce

* updates xfail for incorrect output

* updates go fast

* updates llvm verifier

* removes torchaudio dependency

705 of 985 new or added lines in 34 files covered. (71.57%)

5 existing lines in 4 files now uncovered.

38837 of 62901 relevant lines covered (61.74%)

978.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.29
/sdfg/src/analysis/loop_carried_dependency_analysis.cpp
1
#include "sdfg/analysis/loop_carried_dependency_analysis.h"
2

3
#include <cassert>
4
#include <map>
5
#include <optional>
6
#include <set>
7
#include <string>
8
#include <vector>
9

10
#include <isl/ctx.h>
11
#include <isl/options.h>
12
#include <isl/set.h>
13

14
#include "sdfg/analysis/analysis.h"
15
#include "sdfg/analysis/assumptions_analysis.h"
16
#include "sdfg/analysis/data_dependency_analysis.h"
17
#include "sdfg/analysis/loop_analysis.h"
18
#include "sdfg/analysis/memory_layout_analysis.h"
19
#include "sdfg/analysis/users.h"
20
#include "sdfg/data_flow/access_node.h"
21
#include "sdfg/data_flow/library_nodes/math/cmath/cmath_node.h"
22
#include "sdfg/data_flow/memlet.h"
23
#include "sdfg/data_flow/tasklet.h"
24
#include "sdfg/structured_control_flow/block.h"
25
#include "sdfg/structured_control_flow/if_else.h"
26
#include "sdfg/structured_control_flow/sequence.h"
27
#include "sdfg/structured_sdfg.h"
28
#include "sdfg/symbolic/maps.h"
29
#include "sdfg/symbolic/polyhedral.h"
30
#include "sdfg/symbolic/symbolic.h"
31
#include "sdfg/types/scalar.h"
32

33
namespace sdfg {
34
namespace analysis {
35

36
LoopCarriedDependencyAnalysis::LoopCarriedDependencyAnalysis(StructuredSDFG& sdfg)
37
    : Analysis(sdfg), node_(sdfg.root()) {}
178✔
38

39
LoopCarriedDependencyAnalysis::LoopCarriedDependencyAnalysis(StructuredSDFG& sdfg, structured_control_flow::Sequence& node)
40
    : Analysis(sdfg), node_(node) {}
×
41

42
DataDependencyAnalysis& LoopCarriedDependencyAnalysis::detailed_dda() {
178✔
43
    if (!detailed_dda_) {
178✔
44
        detailed_dda_ = std::make_unique<DataDependencyAnalysis>(this->sdfg_, this->node_);
178✔
45
        detailed_dda_->set_detailed(true);
178✔
46
    }
178✔
47
    return *detailed_dda_;
178✔
48
}
178✔
49

50
void LoopCarriedDependencyAnalysis::analyze_loop(
51
    analysis::AnalysisManager& /*analysis_manager*/, structured_control_flow::StructuredLoop& /*loop*/
52
) {
×
53
    // Per-loop work is done inline in `run()`.
54
}
×
55

56
namespace {
57

58
bool is_undefined_user(User& user) {
4,117✔
59
    // Mirror DDA::is_undefined_user — undefined users have a null vertex.
60
    return user.element() == nullptr;
4,117✔
61
}
4,117✔
62

63
// Collect a user's subsets, preferring delinearized (multi-dim) subsets from
64
// MemoryLayoutAnalysis when available. ISL cannot reason precisely about
65
// linearized expressions like `i*N + j`; delinearization recovers `[i, j]` so
66
// `dependence_deltas` can compute exact distance vectors.
67
//
68
// The returned vector is index-aligned with the user's underlying memlets:
69
// for each memlet, either MLA's delinearized subset (when MLA succeeded) or
70
// the memlet's original subset (linearized fallback).
71
std::vector<data_flow::Subset> collect_subsets(User& user, MemoryLayoutAnalysis& mla) {
4,112✔
72
    std::vector<data_flow::Subset> result;
4,112✔
73
    auto* access_node = dynamic_cast<data_flow::AccessNode*>(user.element());
4,112✔
74
    if (access_node == nullptr) {
4,112✔
75
        for (auto& s : user.subsets()) result.push_back(s);
×
76
        return result;
×
77
    }
×
78
    auto& graph = access_node->get_parent();
4,112✔
79
    if (user.use() == Use::READ || user.use() == Use::VIEW) {
4,112✔
80
        for (auto& edge : graph.out_edges(*access_node)) {
974✔
81
            if (auto* acc = mla.access(edge)) {
974✔
82
                result.push_back(acc->subset);
972✔
83
            } else {
972✔
84
                result.push_back(edge.subset());
2✔
85
            }
2✔
86
        }
974✔
87
    } else if (user.use() == Use::WRITE || user.use() == Use::MOVE) {
3,207✔
88
        for (auto& edge : graph.in_edges(*access_node)) {
3,207✔
89
            if (auto* acc = mla.access(edge)) {
3,207✔
90
                result.push_back(acc->subset);
3,207✔
91
            } else {
3,207✔
92
                result.push_back(edge.subset());
×
93
            }
×
94
        }
3,207✔
95
    }
3,207✔
96
    return result;
4,112✔
97
}
4,112✔
98

99
// Compute the union of iteration-distance delta sets between two users wrt
100
// `loop`'s indvar. Inner-loop indvars are marked non-constant so that the ISL
101
// formulation existentially quantifies them per-side (giving "different inner
102
// iteration" the freedom it needs).
103
//
104
// Returns {empty=true} when no loop-carried dependence exists between the two
105
// users. Returns {empty=false, deltas_str=""} for the scalar shortcut and for
106
// undefined users (dependence exists, distance unrepresentable).
107
symbolic::maps::DependenceDeltas pair_deltas(
108
    StructuredSDFG& sdfg,
109
    User& previous,
110
    User& current,
111
    analysis::AnalysisManager& analysis_manager,
112
    AssumptionsAnalysis& assumptions_analysis,
113
    structured_control_flow::StructuredLoop& loop
114
) {
5,574✔
115
    symbolic::maps::DependenceDeltas empty_result{true, "", {}};
5,574✔
116

117
    if (previous.container() != current.container()) {
5,574✔
118
        return empty_result;
×
119
    }
×
120

121
    // Scalar shortcut: any pair of accesses to a scalar is loop-carried but the
122
    // distance vector is unrepresentable.
123
    auto& type = sdfg.type(previous.container());
5,574✔
124
    if (dynamic_cast<const types::Scalar*>(&type)) {
5,574✔
125
        return symbolic::maps::DependenceDeltas{false, "", {}};
3,514✔
126
    }
3,514✔
127

128
    if (is_undefined_user(previous) || is_undefined_user(current)) {
2,060✔
129
        return empty_result;
4✔
130
    }
4✔
131

132
    // Use MLA-delinearized subsets when available so ISL can reason precisely
133
    // about multi-dimensional pointer accesses.
134
    auto& mla = analysis_manager.get<analysis::MemoryLayoutAnalysis>();
2,056✔
135
    auto previous_subsets = collect_subsets(previous, mla);
2,056✔
136
    auto current_subsets = collect_subsets(current, mla);
2,056✔
137

138
    auto previous_scope = Users::scope(&previous);
2,056✔
139
    auto previous_assumptions = assumptions_analysis.get(*previous_scope, true);
2,056✔
140
    auto current_scope = Users::scope(&current);
2,056✔
141
    auto current_assumptions = assumptions_analysis.get(*current_scope, true);
2,056✔
142

143
    // Mark loop's indvar and all nested loop indvars as non-constant from this
144
    // loop's perspective.
145
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
2,056✔
146
    if (previous_assumptions.find(loop.indvar()) != previous_assumptions.end()) {
2,056✔
147
        previous_assumptions.at(loop.indvar()).constant(false);
2,056✔
148
    }
2,056✔
149
    if (current_assumptions.find(loop.indvar()) != current_assumptions.end()) {
2,056✔
150
        current_assumptions.at(loop.indvar()).constant(false);
2,056✔
151
    }
2,056✔
152
    for (auto& inner_loop : loop_analysis.descendants(&loop)) {
5,142✔
153
        if (auto structured_loop = dynamic_cast<const structured_control_flow::StructuredLoop*>(inner_loop)) {
5,142✔
154
            auto indvar = structured_loop->indvar();
5,142✔
155
            if (previous_assumptions.find(indvar) != previous_assumptions.end()) {
5,142✔
156
                previous_assumptions.at(indvar).constant(false);
5,142✔
157
            }
5,142✔
158
            if (current_assumptions.find(indvar) != current_assumptions.end()) {
5,142✔
159
                current_assumptions.at(indvar).constant(false);
5,142✔
160
            }
5,142✔
161
        }
5,142✔
162
    }
5,142✔
163

164
    // Collect deltas across all subset pairs and union them.
165
    symbolic::AssumptionsBounds previous_bounds(previous_assumptions);
2,056✔
166
    symbolic::AssumptionsBounds current_bounds(current_assumptions);
2,056✔
167

168
    isl_ctx* union_ctx = nullptr;
2,056✔
169
    isl_set* accumulated = nullptr;
2,056✔
170
    std::vector<std::string> result_dimensions;
2,056✔
171

172
    for (auto& previous_subset : previous_subsets) {
2,056✔
173
        for (auto& current_subset : current_subsets) {
2,107✔
174
            auto deltas = symbolic::maps::
2,107✔
175
                dependence_deltas(previous_subset, current_subset, loop.indvar(), previous_bounds, current_bounds);
2,107✔
176
            if (deltas.empty) {
2,107✔
177
                continue;
1,438✔
178
            }
1,438✔
179
            if (deltas.deltas_str.empty()) {
669✔
180
                if (accumulated) {
77✔
181
                    isl_set_free(accumulated);
×
182
                    isl_ctx_free(union_ctx);
×
183
                }
×
184
                return symbolic::maps::DependenceDeltas{false, "", {}};
77✔
185
            }
77✔
186
            if (!union_ctx) {
592✔
187
                union_ctx = isl_ctx_alloc();
572✔
188
                isl_options_set_on_error(union_ctx, ISL_ON_ERROR_CONTINUE);
572✔
189
                accumulated = isl_set_read_from_str(union_ctx, deltas.deltas_str.c_str());
572✔
190
                result_dimensions = deltas.dimensions;
572✔
191
            } else {
572✔
192
                isl_set* new_set = isl_set_read_from_str(union_ctx, deltas.deltas_str.c_str());
20✔
193
                if (new_set && accumulated) {
20✔
194
                    accumulated = isl_set_union(accumulated, new_set);
20✔
195
                } else if (new_set) {
20✔
196
                    isl_set_free(new_set);
×
197
                }
×
198
            }
20✔
199
        }
592✔
200
    }
2,056✔
201

202
    if (!accumulated) {
1,979✔
203
        if (union_ctx) {
1,407✔
204
            isl_ctx_free(union_ctx);
×
205
        }
×
206
        return empty_result;
1,407✔
207
    }
1,407✔
208

209
    char* str = isl_set_to_str(accumulated);
572✔
210
    if (!str) {
572✔
211
        isl_set_free(accumulated);
×
212
        isl_ctx_free(union_ctx);
×
213
        return symbolic::maps::DependenceDeltas{false, "", {}};
×
214
    }
×
215
    std::string union_str(str);
572✔
216
    free(str);
572✔
217

218
    isl_set_free(accumulated);
572✔
219
    isl_ctx_free(union_ctx);
572✔
220

221
    return symbolic::maps::DependenceDeltas{false, union_str, result_dimensions};
572✔
222
}
572✔
223

224
void merge_deltas(LoopCarriedDependencyInfo& info, const symbolic::maps::DependenceDeltas& add) {
115✔
225
    if (add.empty) return;
115✔
226
    if (info.deltas.empty) {
115✔
227
        info.deltas = add;
×
228
        return;
×
229
    }
×
230
    if (add.deltas_str.empty() || info.deltas.deltas_str.empty()) {
115✔
231
        info.deltas.empty = false;
62✔
232
        return;
62✔
233
    }
62✔
234
    isl_ctx* ctx = isl_ctx_alloc();
53✔
235
    isl_options_set_on_error(ctx, ISL_ON_ERROR_CONTINUE);
53✔
236
    isl_set* s1 = isl_set_read_from_str(ctx, info.deltas.deltas_str.c_str());
53✔
237
    isl_set* s2 = isl_set_read_from_str(ctx, add.deltas_str.c_str());
53✔
238
    if (s1 && s2) {
53✔
239
        isl_set* u = isl_set_union(s1, s2);
53✔
240
        char* str = u ? isl_set_to_str(u) : nullptr;
53✔
241
        if (str) {
53✔
242
            info.deltas.deltas_str = std::string(str);
20✔
243
            free(str);
20✔
244
        } else {
33✔
245
            info.deltas.deltas_str = "";
33✔
246
            info.deltas.dimensions.clear();
33✔
247
        }
33✔
248
        if (u) isl_set_free(u);
53✔
249
    } else {
53✔
250
        if (s1) isl_set_free(s1);
×
251
        if (s2) isl_set_free(s2);
×
252
        info.deltas.deltas_str = "";
×
253
        info.deltas.dimensions.clear();
×
254
    }
×
255
    isl_ctx_free(ctx);
53✔
256
}
53✔
257

258
bool user_in_subtree(User& user, const structured_control_flow::ControlFlowNode& subtree) {
×
259
    if (user.element() == nullptr) {
×
260
        return false;
×
261
    }
×
262
    auto* scope = Users::scope(&user);
×
263
    while (scope != nullptr) {
×
264
        if (scope == &subtree) {
×
265
            return true;
×
266
        }
×
267
        scope = scope->get_parent();
×
268
    }
×
269
    return false;
×
270
}
×
271

272
// Map an associative/commutative combine node (scalar tasklet or CMath library
273
// node) to its reduction operator. Returns nullopt for any node that is not a
274
// recognized reorderable reduction operator.
275
std::optional<structured_control_flow::ReductionOperation> combine_operator(const data_flow::DataFlowNode& node) {
128✔
276
    using structured_control_flow::ReductionOperation;
128✔
277
    if (auto* tasklet = dynamic_cast<const data_flow::Tasklet*>(&node)) {
128✔
278
        switch (tasklet->code()) {
123✔
279
            case data_flow::TaskletCode::fp_add:
47✔
280
            case data_flow::TaskletCode::int_add:
48✔
281
                return ReductionOperation::Add;
48✔
282
            case data_flow::TaskletCode::fp_fma:
46✔
283
                // Fused multiply-add `_out = _in0 * _in1 + _in2` is an additive
284
                // reduction over its addend operand.
285
                return ReductionOperation::Add;
46✔
286
            case data_flow::TaskletCode::fp_mul:
4✔
287
            case data_flow::TaskletCode::int_mul:
4✔
288
                return ReductionOperation::Mul;
4✔
289
            case data_flow::TaskletCode::int_smin:
1✔
290
            case data_flow::TaskletCode::int_umin:
1✔
291
                return ReductionOperation::Min;
1✔
292
            case data_flow::TaskletCode::int_smax:
1✔
293
            case data_flow::TaskletCode::int_umax:
1✔
294
                return ReductionOperation::Max;
1✔
295
            default:
23✔
296
                return std::nullopt;
23✔
297
        }
123✔
298
    }
123✔
299
    if (auto* cmath = dynamic_cast<const math::cmath::CMathNode*>(&node)) {
5✔
300
        switch (cmath->function()) {
5✔
301
            case math::cmath::CMathFunction::fmax:
2✔
302
                return ReductionOperation::Max;
2✔
NEW
303
            case math::cmath::CMathFunction::fmin:
×
NEW
304
                return ReductionOperation::Min;
×
305
            case math::cmath::CMathFunction::fma:
1✔
306
                // `fma(x, y, z) = x * y + z`: additive reduction over the addend
307
                // operand `z`
308
                return ReductionOperation::Add;
1✔
309
            default:
2✔
310
                return std::nullopt;
2✔
311
        }
5✔
312
    }
5✔
NEW
313
    return std::nullopt;
×
314
}
5✔
315

316
std::optional<std::string> fma_addend_connector(const data_flow::DataFlowNode& node) {
103✔
317
    if (auto* tasklet = dynamic_cast<const data_flow::Tasklet*>(&node)) {
103✔
318
        if (tasklet->code() == data_flow::TaskletCode::fp_fma) {
100✔
319
            return tasklet->inputs().at(2);
46✔
320
        }
46✔
321
    }
100✔
322
    if (auto* cmath = dynamic_cast<const math::cmath::CMathNode*>(&node)) {
57✔
323
        if (cmath->function() == math::cmath::CMathFunction::fma) {
3✔
324
            return cmath->inputs().at(2);
1✔
325
        }
1✔
326
    }
3✔
327
    return std::nullopt;
56✔
328
}
57✔
329

330
// Collect the dataflow Blocks directly belonging to `node`'s subtree, descending
331
// through Sequences and IfElse branches but NOT into nested loops (those have
332
// their own loop-carried analysis). Used to scan a loop body for the combine.
333
void collect_body_blocks(
334
    const structured_control_flow::ControlFlowNode& node, std::vector<const structured_control_flow::Block*>& out
335
) {
1,535✔
336
    if (auto* block = dynamic_cast<const structured_control_flow::Block*>(&node)) {
1,535✔
337
        out.push_back(block);
504✔
338
    } else if (auto* seq = dynamic_cast<const structured_control_flow::Sequence*>(&node)) {
1,031✔
339
        for (size_t i = 0; i < seq->size(); i++) {
1,535✔
340
            collect_body_blocks(seq->at(i).first, out);
910✔
341
        }
910✔
342
    } else if (auto* ifelse = dynamic_cast<const structured_control_flow::IfElse*>(&node)) {
625✔
343
        for (size_t i = 0; i < ifelse->size(); i++) {
13✔
344
            collect_body_blocks(ifelse->at(i).first, out);
8✔
345
        }
8✔
346
    }
5✔
347
    // Stop at nested StructuredLoop / While: their bodies belong to other loops.
348
}
1,535✔
349

350
// True if the accumulator subset is invariant across the reduction loop: the
351
// induction variable does not appear in any index expression. This is a sound
352
// (conservative) guard — an accumulator whose address moves with the induction
353
// variable is not a single-location reduction. The actual *equality* of the
354
// written and read-back addresses is proven separately and precisely by
355
// `symbolic::polyhedral::equal_on_domain`.
356
bool address_invariant_in_indvar(const data_flow::Subset& subset, const symbolic::Symbol& indvar) {
102✔
357
    for (auto& dim : subset) {
105✔
358
        for (auto& atom : symbolic::atoms(dim)) {
111✔
359
            if (atom->get_name() == indvar->get_name()) {
111✔
360
                return false;
3✔
361
            }
3✔
362
        }
111✔
363
    }
105✔
364
    return true;
99✔
365
}
102✔
366

367
} // namespace
368

369
void LoopCarriedDependencyAnalysis::run(analysis::AnalysisManager& analysis_manager) {
178✔
370
    dependencies_.clear();
178✔
371
    pairs_.clear();
178✔
372

373
    // Build a fresh branch-condition-aware assumptions analysis. The
374
    // manager-cached `AssumptionsAnalysis` deliberately skips IfElse-branch
375
    // refinement to stay cheap; LCDA needs the refined coupled constraints
376
    // so that `dependence_deltas`'s ISL formulation can prove halo-style
377
    // patterns are non-loop-carried.
378
    detailed_assumptions_ = std::make_unique<AssumptionsAnalysis>(this->sdfg_, /*with_branch_conditions=*/true);
178✔
379
    detailed_assumptions_->run(analysis_manager);
178✔
380

381
    // Drive entirely from DDA's reaching-definitions scaffold:
382
    //   - DDA computes per-loop boundary snapshots (upward-exposed reads,
383
    //     escaping definitions) — its primary job.
384
    //   - LCDA enumerates the cross-iteration pair space and computes delta
385
    //     sets via `pair_deltas` (using `symbolic::maps::dependence_deltas`).
386
    //
387
    // For a structured loop L with indvar i_L:
388
    //   pairs(L) = { (W,R, RAW, Δ_L(W,R)) : W ∈ esc(L), R ∈ ue(L),
389
    //                                       cont(W) = cont(R), Δ ≠ ∅ }
390
    //            ∪ { (W₁,W₂, WAW, Δ_L(W₁,W₂)) : W₁,W₂ ∈ esc(L),
391
    //                                           cont(W₁) = cont(W₂), Δ ≠ ∅ }
392
    auto& dda = detailed_dda();
178✔
393
    dda.run(analysis_manager);
178✔
394
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
178✔
395

396
    for (auto* loop_node : loop_analysis.loops()) {
617✔
397
        auto* loop = dynamic_cast<structured_control_flow::StructuredLoop*>(loop_node);
617✔
398
        if (loop == nullptr) {
617✔
399
            continue;
×
400
        }
×
401

402
        // Restrict to loops within the analysis scope (`node_`).
403
        bool in_scope = false;
617✔
404
        structured_control_flow::ControlFlowNode* cur = loop;
617✔
405
        while (cur != nullptr) {
2,724✔
406
            if (cur == &node_) {
2,724✔
407
                in_scope = true;
617✔
408
                break;
617✔
409
            }
617✔
410
            cur = cur->get_parent();
2,107✔
411
        }
2,107✔
412
        if (!in_scope) {
617✔
413
            continue;
×
414
        }
×
415

416
        // Non-monotonic / unanalyzable loops: don't register; consumers must
417
        // treat absence as "no info" and fall back to safe defaults.
418
        if (!loop->is_monotonic()) {
617✔
419
            continue;
×
420
        }
×
421
        if (!dda.has_loop_boundary(*loop)) {
617✔
422
            continue;
×
423
        }
×
424

425
        auto& ue_reads = dda.upward_exposed_reads(*loop);
617✔
426
        auto& esc_defs = dda.escaping_definitions(*loop);
617✔
427

428
        auto& deps = dependencies_[loop];
617✔
429
        auto& pair_list = pairs_[loop];
617✔
430

431
        // RAW: escaping_writes × upward_exposed_reads
432
        for (auto& write_entry : esc_defs) {
2,774✔
433
            auto* write = write_entry.first;
2,774✔
434
            for (auto* read : ue_reads) {
44,037✔
435
                if (write->container() != read->container()) {
44,037✔
436
                    continue;
43,103✔
437
                }
43,103✔
438
                auto deltas = pair_deltas(this->sdfg_, *write, *read, analysis_manager, *detailed_assumptions_, *loop);
934✔
439
                if (deltas.empty) continue;
934✔
440
                pair_list.push_back(LoopCarriedDependencyPair{write, read, LOOP_CARRIED_DEPENDENCY_READ_WRITE, deltas});
380✔
441
                auto it = deps.find(read->container());
380✔
442
                if (it == deps.end()) {
380✔
443
                    deps[read->container()] = LoopCarriedDependencyInfo{LOOP_CARRIED_DEPENDENCY_READ_WRITE, deltas};
265✔
444
                } else {
265✔
445
                    it->second.type = LOOP_CARRIED_DEPENDENCY_READ_WRITE;
115✔
446
                    merge_deltas(it->second, deltas);
115✔
447
                }
115✔
448
            }
380✔
449
        }
2,774✔
450

451
        // WAW: escaping_writes × escaping_writes (ordered pairs incl. self)
452
        for (auto& w1_entry : esc_defs) {
2,774✔
453
            auto* w1 = w1_entry.first;
2,774✔
454
            for (auto& w2_entry : esc_defs) {
26,246✔
455
                auto* w2 = w2_entry.first;
26,246✔
456
                if (w1->container() != w2->container()) {
26,246✔
457
                    continue;
21,606✔
458
                }
21,606✔
459
                auto deltas = pair_deltas(this->sdfg_, *w1, *w2, analysis_manager, *detailed_assumptions_, *loop);
4,640✔
460
                if (deltas.empty) continue;
4,640✔
461
                pair_list.push_back(LoopCarriedDependencyPair{w1, w2, LOOP_CARRIED_DEPENDENCY_WRITE_WRITE, deltas});
3,783✔
462
                if (deps.find(w1->container()) == deps.end()) {
3,783✔
463
                    deps[w1->container()] = LoopCarriedDependencyInfo{LOOP_CARRIED_DEPENDENCY_WRITE_WRITE, deltas};
1,172✔
464
                }
1,172✔
465
            }
3,783✔
466
        }
2,774✔
467

468
        detect_reductions(*loop);
617✔
469
    }
617✔
470
}
178✔
471

472
void LoopCarriedDependencyAnalysis::detect_reductions(structured_control_flow::StructuredLoop& loop) {
617✔
473
    // Ensure an entry exists for every analyzed loop so queries on reduction-free
474
    // loops return an empty list rather than asserting.
475
    auto& result = reductions_[&loop];
617✔
476

477
    auto dep_it = dependencies_.find(&loop);
617✔
478
    if (dep_it == dependencies_.end()) {
617✔
NEW
479
        return;
×
NEW
480
    }
×
481
    auto& deps = dep_it->second;
617✔
482

483
    std::vector<const structured_control_flow::Block*> blocks;
617✔
484
    collect_body_blocks(loop.root(), blocks);
617✔
485

486
    auto indvar = loop.indvar();
617✔
487

488
    // Assumptions for the loop body, used by the polyhedral equality test. They
489
    // must be taken from the loop body (`loop.root()`), not the loop node, so
490
    // that the induction variable's own bounds (e.g. 0 <= i < N) are present in
491
    // the domain. The induction variable must additionally be treated as
492
    // evolving (a domain dimension) rather than a constant parameter so that
493
    // shifted accesses such as A[i] vs A[i-1] are correctly rejected.
494
    symbolic::Assumptions assums = detailed_assumptions_->get(loop.root(), true);
617✔
495
    auto indvar_it = assums.find(indvar);
617✔
496
    if (indvar_it != assums.end()) {
617✔
497
        indvar_it->second.constant(false);
617✔
498
    }
617✔
499

500
    // Candidate accumulator -> operator. A container is rejected (and stays out
501
    // of the result) if any of its writes in the body is not a clean combine.
502
    std::map<std::string, structured_control_flow::ReductionOperation> found;
617✔
503
    std::set<std::string> rejected;
617✔
504

505
    for (auto* block : blocks) {
617✔
506
        auto& graph = block->dataflow();
504✔
507
        for (auto& node : graph.nodes()) {
1,948✔
508
            auto* write_node = dynamic_cast<const data_flow::AccessNode*>(&node);
1,948✔
509
            if (write_node == nullptr) {
1,948✔
510
                continue;
538✔
511
            }
538✔
512

513
            std::vector<const data_flow::Memlet*> in_edges;
1,410✔
514
            for (auto& edge : graph.in_edges(*write_node)) {
1,410✔
515
                in_edges.push_back(&edge);
539✔
516
            }
539✔
517
            if (in_edges.empty()) {
1,410✔
518
                continue; // not written in this block
871✔
519
            }
871✔
520

521
            const std::string& container = write_node->data();
539✔
522

523
            // Only loop-carried read-write containers can be reductions.
524
            auto ci = deps.find(container);
539✔
525
            if (ci == deps.end() || ci->second.type != LOOP_CARRIED_DEPENDENCY_READ_WRITE) {
539✔
526
                continue;
411✔
527
            }
411✔
528

529
            // A reduction accumulator is produced by exactly one combine node.
530
            if (in_edges.size() != 1) {
128✔
NEW
531
                rejected.insert(container);
×
NEW
532
                continue;
×
NEW
533
            }
×
534
            auto& write_edge = *in_edges.front();
128✔
535
            auto& combine = write_edge.src();
128✔
536

537
            auto op = combine_operator(combine);
128✔
538
            if (!op.has_value()) {
128✔
539
                rejected.insert(container);
25✔
540
                continue;
25✔
541
            }
25✔
542

543
            // The combine must read the same accumulator back (acc = acc OP x).
544
            // For a fused multiply-add the accumulator must be the addend
545
            // operand; if it feeds a multiplicand the update is acc = acc * b + c,
546
            // which is not a reorderable reduction and is rejected.
547
            auto fma_addend = fma_addend_connector(combine);
103✔
548
            const data_flow::Memlet* read_edge = nullptr;
103✔
549
            bool acc_on_non_addend = false;
103✔
550
            for (auto& edge : graph.in_edges(combine)) {
212✔
551
                auto* read_node = dynamic_cast<const data_flow::AccessNode*>(&edge.src());
212✔
552
                if (read_node == nullptr || read_node->data() != container) {
212✔
553
                    continue;
109✔
554
                }
109✔
555
                if (fma_addend.has_value() && edge.dst_conn() != fma_addend.value()) {
103✔
556
                    // Accumulator feeds a multiplicand of the fma.
557
                    acc_on_non_addend = true;
1✔
558
                    continue;
1✔
559
                }
1✔
560
                read_edge = &edge;
102✔
561
                if (!fma_addend.has_value()) {
102✔
562
                    break;
56✔
563
                }
56✔
564
            }
102✔
565
            if (read_edge == nullptr || acc_on_non_addend) {
103✔
566
                rejected.insert(container);
1✔
567
                continue;
1✔
568
            }
1✔
569

570
            // The accumulator must address one fixed, loop-invariant location
571
            // and the written cell must equal the read-back cell. `equal_on_domain`
572
            // proves the latter precisely (parametric/linearized affine forms),
573
            // while the invariance guard rejects accumulators that move with the
574
            // induction variable.
575
            if (!address_invariant_in_indvar(write_edge.subset(), indvar) ||
102✔
576
                !symbolic::polyhedral::equal_on_domain(write_edge.subset(), read_edge->subset(), indvar, assums)) {
102✔
577
                rejected.insert(container);
4✔
578
                continue;
4✔
579
            }
4✔
580

581
            auto existing = found.find(container);
98✔
582
            if (existing != found.end() && existing->second != op.value()) {
98✔
583
                rejected.insert(container);
1✔
584
                continue;
1✔
585
            }
1✔
586
            found[container] = op.value();
97✔
587
        }
97✔
588
    }
504✔
589

590
    for (auto& entry : found) {
617✔
591
        if (rejected.count(entry.first) != 0) {
97✔
592
            continue;
1✔
593
        }
1✔
594
        result.push_back(structured_control_flow::ReductionInfo{entry.second, entry.first});
96✔
595
    }
96✔
596
}
617✔
597

598
bool LoopCarriedDependencyAnalysis::available(structured_control_flow::StructuredLoop& loop) const {
192✔
599
    return pairs_.find(&loop) != pairs_.end();
192✔
600
}
192✔
601

602
const std::unordered_map<std::string, LoopCarriedDependencyInfo>& LoopCarriedDependencyAnalysis::
603
    dependencies(structured_control_flow::StructuredLoop& loop) const {
342✔
604
    auto it = dependencies_.find(&loop);
342✔
605
    assert(it != dependencies_.end() && "LoopCarriedDependencyAnalysis: loop not analyzed");
342✔
606
    return it->second;
342✔
607
}
342✔
608

609
const std::vector<LoopCarriedDependencyPair>& LoopCarriedDependencyAnalysis::pairs(structured_control_flow::StructuredLoop&
610
                                                                                       loop) const {
126✔
611
    auto it = pairs_.find(&loop);
126✔
612
    assert(it != pairs_.end() && "LoopCarriedDependencyAnalysis: loop not analyzed");
126✔
613
    return it->second;
126✔
614
}
126✔
615

616
std::vector<const LoopCarriedDependencyPair*> LoopCarriedDependencyAnalysis::pairs_between(
617
    structured_control_flow::StructuredLoop& loop,
618
    const structured_control_flow::ControlFlowNode& subtree_a,
619
    const structured_control_flow::ControlFlowNode& subtree_b,
620
    analysis::AnalysisManager& analysis_manager
621
) const {
×
622
    std::vector<const LoopCarriedDependencyPair*> result;
×
623
    auto it = pairs_.find(&loop);
×
624
    if (it == pairs_.end()) {
×
625
        return result;
×
626
    }
×
627

628
    for (auto& pair : it->second) {
×
629
        bool wa = user_in_subtree(*pair.writer, subtree_a);
×
630
        bool wb = user_in_subtree(*pair.writer, subtree_b);
×
631
        bool ra = user_in_subtree(*pair.reader, subtree_a);
×
632
        bool rb = user_in_subtree(*pair.reader, subtree_b);
×
633

634
        if ((wa && rb) || (wb && ra)) {
×
635
            result.push_back(&pair);
×
636
        }
×
637
    }
×
638
    return result;
×
639
}
×
640

641
bool LoopCarriedDependencyAnalysis::has_loop_carried(structured_control_flow::StructuredLoop& loop) const {
2✔
642
    auto it = pairs_.find(&loop);
2✔
643
    if (it == pairs_.end()) return false;
2✔
644
    return !it->second.empty();
2✔
645
}
2✔
646

647
bool LoopCarriedDependencyAnalysis::has_loop_carried_raw(structured_control_flow::StructuredLoop& loop) const {
5✔
648
    auto it = pairs_.find(&loop);
5✔
649
    if (it == pairs_.end()) return false;
5✔
650
    for (auto& p : it->second) {
5✔
651
        if (p.type == LOOP_CARRIED_DEPENDENCY_READ_WRITE) return true;
5✔
652
    }
5✔
653
    return false;
1✔
654
}
5✔
655

656
bool LoopCarriedDependencyAnalysis::has_loop_carried_hazard(structured_control_flow::StructuredLoop& loop) const {
166✔
657
    auto it = pairs_.find(&loop);
166✔
658
    if (it == pairs_.end()) return false;
166✔
659
    for (auto& p : it->second) {
365✔
660
        if (p.type != LOOP_CARRIED_DEPENDENCY_WRITE_WRITE) return true;
365✔
661
    }
365✔
662
    return false;
77✔
663
}
166✔
664

665
const std::vector<structured_control_flow::ReductionInfo>& LoopCarriedDependencyAnalysis::
666
    reductions(structured_control_flow::StructuredLoop& loop) const {
208✔
667
    auto it = reductions_.find(&loop);
208✔
668
    assert(it != reductions_.end() && "LoopCarriedDependencyAnalysis: loop not analyzed");
208✔
669
    return it->second;
208✔
670
}
208✔
671

672
bool LoopCarriedDependencyAnalysis::has_reductions(structured_control_flow::StructuredLoop& loop) const {
21✔
673
    auto it = reductions_.find(&loop);
21✔
674
    return it != reductions_.end() && !it->second.empty();
21✔
675
}
21✔
676

677
bool LoopCarriedDependencyAnalysis::is_reduction_only(structured_control_flow::StructuredLoop& loop) const {
186✔
678
    auto rit = reductions_.find(&loop);
186✔
679
    if (rit == reductions_.end() || rit->second.empty()) {
186✔
680
        return false;
147✔
681
    }
147✔
682
    std::set<std::string> reduction_containers;
39✔
683
    for (auto& reduction : rit->second) {
43✔
684
        reduction_containers.insert(reduction.container);
43✔
685
    }
43✔
686

687
    auto pit = pairs_.find(&loop);
39✔
688
    if (pit == pairs_.end()) {
39✔
NEW
689
        return false;
×
NEW
690
    }
×
691
    for (auto& pair : pit->second) {
93✔
692
        if (pair.type == LOOP_CARRIED_DEPENDENCY_WRITE_WRITE) {
93✔
693
            continue;
50✔
694
        }
50✔
695
        if (reduction_containers.count(pair.writer->container()) == 0) {
43✔
696
            return false;
1✔
697
        }
1✔
698
    }
43✔
699
    return true;
38✔
700
}
39✔
701

702
} // namespace analysis
703
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc