• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28158800507

25 Jun 2026 08:57AM UTC coverage: 61.644% (+0.06%) from 61.582%
28158800507

push

github

web-flow
MapFusionByDomain (#771)

 + New Map fusion caches data about iteration domain and map candidates
 + only matches up iteration domain exactly, per loop level.
 + Can support fusing non-leaf stacks of loops (stack ends where the shallower stack stops being perfectly nested & parallel)
 + new Element::replace for bulk replacements
 + New PatternMatcher visitor supports descending into replaced or modified nodes to allow for single-pass nested loop fusings
 + LoopAnalysis can now be kept up-to-date with changes done by Map-fusion
 + unit tests for the updating of LoopAnalysis
 * updated LoopAnalysis to be easier to keep up-to-date with changes. LoopTree is no longer ordered, if you want to iterate in pre-order, use the specific method for that
 + convenience StructuredSDFGBuilder.remove_from_parent()
 + RedundantLoadElim pass to skip reading from memory locations that have just been written (same block). Fusing no longer needs to do this
     RedundantLoadElimination does a simple check for other writes to the same structure. Can skip writes if redundant or not modify, if their are writes to different indices
* Updated verifiers to match new fusion
~ moved verifier checks behind correctness checks in npbench harness. Its more critical if we do not even get the expected results
* Added MapFusionByDomain also to loop-norm stage (currently inactive, causes more kernels that currently cannot be safely offloaded to CUDA.
---------

Co-authored-by: Lukas Truemper <lukas.truemper@outlook.de>

771 of 1186 new or added lines in 55 files covered. (65.01%)

6 existing lines in 6 files now uncovered.

38302 of 62134 relevant lines covered (61.64%)

987.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.91
/sdfg/src/data_flow/library_nodes/math/tensor/reduce_node.cpp
1
#include "sdfg/data_flow/library_nodes/math/tensor/reduce_node.h"
2

3
#include "sdfg/analysis/analysis.h"
4
#include "sdfg/builder/structured_sdfg_builder.h"
5

6
#include <algorithm>
7

8
namespace sdfg {
9
namespace math {
10
namespace tensor {
11

12
ReduceNode::ReduceNode(
13
    size_t element_id,
14
    const DebugInfo& debug_info,
15
    const graph::Vertex vertex,
16
    data_flow::DataFlowGraph& parent,
17
    const data_flow::LibraryNodeCode& code,
18
    const std::vector<symbolic::Expression>& shape,
19
    const std::vector<int64_t>& axes,
20
    bool keepdims
21
)
22
    : TensorNode(element_id, debug_info, vertex, parent, code, {}, {"Y", "X"}, data_flow::ImplementationType_NONE),
26✔
23
      shape_(shape), axes_(axes), keepdims_(keepdims) {}
26✔
24

25
void ReduceNode::validate(const Function& function) const {
12✔
26
    TensorNode::validate(function);
12✔
27

28
    auto& graph = this->get_parent();
12✔
29

30
    auto* iedge = graph.in_edge_for_connector(*this, inputs_.at(1));
12✔
31
    auto& tensor_input = static_cast<const types::Tensor&>(iedge->base_type());
12✔
32
    validate_shape_matches(shape_, tensor_input.layout(), "input");
12✔
33

34
    // Calculate expected output shape based on axes and keepdims
35
    std::vector<int64_t> sorted_axes = axes_;
12✔
36
    // Normalize negative axes
37
    for (auto& axis : sorted_axes) {
13✔
38
        if (axis < 0) {
13✔
39
            axis = static_cast<int64_t>(shape_.size()) + axis;
2✔
40
        }
2✔
41
        // Validate axis is in bounds
42
        if (axis < 0 || axis >= static_cast<int64_t>(shape_.size())) {
13✔
43
            throw InvalidSDFGException(
×
44
                "Library Node: Axis value out of bounds. Axis: " + std::to_string(axis) +
×
45
                " Shape size: " + std::to_string(shape_.size())
×
46
            );
×
47
        }
×
48
    }
13✔
49
    std::sort(sorted_axes.begin(), sorted_axes.end());
12✔
50

51
    std::vector<symbolic::Expression> expected_output_shape;
12✔
52
    for (size_t i = 0; i < shape_.size(); ++i) {
32✔
53
        bool is_axis = false;
20✔
54
        for (auto axis : sorted_axes) {
22✔
55
            if (axis == (int64_t) i) {
22✔
56
                is_axis = true;
13✔
57
                break;
13✔
58
            }
13✔
59
        }
22✔
60

61
        if (is_axis) {
20✔
62
            if (keepdims_) {
13✔
63
                expected_output_shape.push_back(symbolic::one());
1✔
64
            }
1✔
65
        } else {
13✔
66
            expected_output_shape.push_back(shape_[i]);
7✔
67
        }
7✔
68
    }
20✔
69

70
    auto* iedge_result = graph.in_edge_for_connector(*this, inputs_.at(0));
12✔
71
    auto& tensor_output = static_cast<const types::Tensor&>(iedge_result->base_type());
12✔
72
    validate_shape_matches(expected_output_shape, tensor_output.layout(), "output");
12✔
73
}
12✔
74

75

76
symbolic::SymbolSet ReduceNode::symbols() const {
×
77
    symbolic::SymbolSet syms;
×
78
    for (const auto& dim : shape_) {
×
79
        for (auto& atom : symbolic::atoms(dim)) {
×
80
            syms.insert(atom);
×
81
        }
×
82
    }
×
83
    return syms;
×
84
}
×
85

86
void ReduceNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {
×
87
    for (auto& dim : shape_) {
×
88
        dim = symbolic::subs(dim, old_expression, new_expression);
×
89
    }
×
90
}
×
91

NEW
92
void ReduceNode::replace(const symbolic::ExpressionMapping& replacements) {
×
NEW
93
    for (auto& dim : shape_) {
×
NEW
94
        dim = symbolic::subs(dim, replacements);
×
NEW
95
    }
×
NEW
96
}
×
97

98
data_flow::PointerAccessType ReduceNode::pointer_access_type(int input_idx) const {
×
99
    if (input_idx == 0) {
×
100
        return data_flow::PointerAccessMeta::create_full_write_only(symbolic::__nullptr__(), true);
×
101
    } else if (input_idx == 1) {
×
102
        return data_flow::PointerAccessMeta::create_read_only(symbolic::__nullptr__(), true);
×
103
    } else {
×
104
        return TensorNode::pointer_access_type(input_idx);
×
105
    }
×
106
}
×
107

108
bool ReduceNode::expand_inner(
109
    builder::StructuredSDFGBuilder& builder,
110
    analysis::AnalysisManager& analysis_manager,
111
    structured_control_flow::Block& block,
112
    data_flow::DataFlowGraph& dataflow,
113
    structured_control_flow::Sequence& parent,
114
    Transition& transition,
115
    const data_flow::Memlet* iedge_input,
116
    const data_flow::Memlet* iedge_result,
117
    const data_flow::AccessNode* input_node,
118
    const data_flow::AccessNode* output_node,
119
    const std::vector<symbolic::Expression>& output_shape,
120
    const std::vector<int64_t>& sorted_axes
121
) {
6✔
122
    auto org_idx = parent.index(block);
6✔
123

124
    sdfg::types::Scalar element_type(iedge_result->base_type().primitive_type());
6✔
125
    types::Tensor scalar_tensor(element_type.primitive_type(), {});
6✔
126

127
    // Add new sequence
128
    auto& new_sequence = builder.add_sequence_before(parent, block, transition.assignments(), block.debug_info());
6✔
129

130
    // 1. Initialization Loop
131
    {
6✔
132
        data_flow::Subset init_subset;
6✔
133
        structured_control_flow::Sequence* last_scope = &new_sequence;
6✔
134
        structured_control_flow::Map* last_map = nullptr;
6✔
135

136
        for (size_t i = 0; i < output_shape.size(); ++i) {
12✔
137
            std::string indvar_str = builder.find_new_name("_i_init");
6✔
138
            builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::Int64));
6✔
139

140
            auto indvar = symbolic::symbol(indvar_str);
6✔
141
            auto init = symbolic::zero();
6✔
142
            auto update = symbolic::add(indvar, symbolic::one());
6✔
143
            auto condition = symbolic::Lt(indvar, output_shape[i]);
6✔
144

145
            last_map = &builder.add_map(
6✔
146
                *last_scope,
6✔
147
                indvar,
6✔
148
                condition,
6✔
149
                init,
6✔
150
                update,
6✔
151
                structured_control_flow::ScheduleType_Sequential::create(),
6✔
152
                {},
6✔
153
                block.debug_info()
6✔
154
            );
6✔
155
            last_scope = &last_map->root();
6✔
156
            init_subset.push_back(indvar);
6✔
157
        }
6✔
158

159
        // Add initialization tasklet
160
        auto& init_block = builder.add_block(*last_scope, {}, block.debug_info());
6✔
161
        auto& init_tasklet =
6✔
162
            builder.add_tasklet(init_block, data_flow::TaskletCode::assign, {"_out"}, {"_in"}, block.debug_info());
6✔
163

164
        auto& const_node =
6✔
165
            builder
6✔
166
                .add_constant(init_block, this->identity(element_type.primitive_type()), element_type, block.debug_info());
6✔
167
        auto& out_access = builder.add_access(init_block, output_node->data(), block.debug_info());
6✔
168

169
        builder
6✔
170
            .add_computational_memlet(init_block, const_node, init_tasklet, "_in", {}, scalar_tensor, block.debug_info());
6✔
171
        builder.add_computational_memlet(
6✔
172
            init_block, init_tasklet, "_out", out_access, init_subset, iedge_result->base_type(), block.debug_info()
6✔
173
        );
6✔
174
    }
6✔
175

176
    // 2. Reduction Loop
177
    {
6✔
178
        data_flow::Subset input_subset;
6✔
179
        data_flow::Subset output_subset;
6✔
180

181
        structured_control_flow::Sequence* last_scope = &new_sequence;
6✔
182
        structured_control_flow::StructuredLoop* last_loop = nullptr;
6✔
183

184
        std::map<size_t, symbolic::Expression> loop_vars_map;
6✔
185
        std::vector<size_t> outer_dims;
6✔
186
        std::vector<size_t> inner_dims;
6✔
187

188
        // Partition dimensions
189
        for (size_t i = 0; i < shape_.size(); ++i) {
18✔
190
            bool is_axis = false;
12✔
191
            for (auto axis : sorted_axes) {
14✔
192
                if (axis == (int64_t) i) {
14✔
193
                    is_axis = true;
7✔
194
                    break;
7✔
195
                }
7✔
196
            }
14✔
197
            if (is_axis) {
12✔
198
                inner_dims.push_back(i);
7✔
199
            } else {
7✔
200
                outer_dims.push_back(i);
5✔
201
            }
5✔
202
        }
12✔
203

204
        // Generate outer parallel loops (Maps)
205
        for (size_t dim_idx : outer_dims) {
6✔
206
            std::string indvar_str = builder.find_new_name("_i");
5✔
207
            builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::Int64));
5✔
208

209
            auto indvar = symbolic::symbol(indvar_str);
5✔
210
            auto init = symbolic::zero();
5✔
211
            auto update = symbolic::add(indvar, symbolic::one());
5✔
212
            auto condition = symbolic::Lt(indvar, shape_[dim_idx]);
5✔
213

214
            auto& map = builder.add_map(
5✔
215
                *last_scope,
5✔
216
                indvar,
5✔
217
                condition,
5✔
218
                init,
5✔
219
                update,
5✔
220
                structured_control_flow::ScheduleType_Sequential::create(),
5✔
221
                {},
5✔
222
                block.debug_info()
5✔
223
            );
5✔
224
            last_scope = &map.root();
5✔
225
            loop_vars_map[dim_idx] = indvar;
5✔
226
        }
5✔
227

228
        // Generate inner sequential loops (Fors)
229
        for (size_t dim_idx : inner_dims) {
7✔
230
            std::string indvar_str = builder.find_new_name("_k");
7✔
231
            builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::Int64));
7✔
232

233
            auto indvar = symbolic::symbol(indvar_str);
7✔
234
            auto init = symbolic::zero();
7✔
235
            auto update = symbolic::add(indvar, symbolic::one());
7✔
236
            auto condition = symbolic::Lt(indvar, shape_[dim_idx]);
7✔
237

238
            last_loop = &builder.add_for(*last_scope, indvar, condition, init, update, {}, block.debug_info());
7✔
239
            last_scope = &last_loop->root();
7✔
240
            loop_vars_map[dim_idx] = indvar;
7✔
241
        }
7✔
242

243
        // Construct output indices
244
        std::vector<symbolic::Expression> input_indices;
6✔
245
        std::vector<symbolic::Expression> output_indices;
6✔
246
        for (size_t i = 0; i < shape_.size(); ++i) {
18✔
247
            input_indices.push_back(loop_vars_map[i]);
12✔
248
            bool is_axis = false;
12✔
249
            for (auto axis : sorted_axes) {
14✔
250
                if (axis == (int64_t) i) {
14✔
251
                    is_axis = true;
7✔
252
                    break;
7✔
253
                }
7✔
254
            }
14✔
255

256
            if (is_axis) {
12✔
257
                if (keepdims_) {
7✔
258
                    output_indices.push_back(symbolic::zero());
1✔
259
                }
1✔
260
            } else {
7✔
261
                output_indices.push_back(loop_vars_map[i]);
5✔
262
            }
5✔
263
        }
12✔
264

265
        this->expand_reduction(
6✔
266
            builder,
6✔
267
            analysis_manager,
6✔
268
            *last_scope,
6✔
269
            input_node->data(),
6✔
270
            output_node->data(),
6✔
271
            static_cast<const types::Tensor&>(iedge_input->base_type()),
6✔
272
            static_cast<const types::Tensor&>(iedge_result->base_type()),
6✔
273
            input_indices,
6✔
274
            output_indices
6✔
275
        );
6✔
276
    }
6✔
277

278
    // Clean up block
279
    builder.clear_code_node_legacy(block, *this);
6✔
280
    // WARNING: this has been deallocated at this point!!
281
    builder.remove_child(parent, org_idx + 1);
6✔
282

283
    return true;
6✔
284
}
6✔
285

286
bool ReduceNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
11✔
287
    auto& dataflow = this->get_parent();
11✔
288
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());
11✔
289

290
    if (dataflow.in_degree(*this) != 2) {
11✔
291
        return false;
×
292
    }
×
293

294
    auto& parent = static_cast<structured_control_flow::Sequence&>(*block.get_parent());
11✔
295
    int index = parent.index(block);
11✔
296
    auto& transition = parent.at(index).second;
11✔
297

298
    auto* iedge_input = dataflow.in_edge_for_connector(*this, inputs_.at(1));
11✔
299
    auto* iedge_result = dataflow.in_edge_for_connector(*this, inputs_.at(0));
11✔
300

301

302
    auto* input_node = dataflow.find_standalone_entry(iedge_input);
11✔
303
    auto* output_node = dataflow.find_standalone_entry(iedge_result);
11✔
304

305
    if (!input_node || !output_node) {
11✔
306
        return false;
×
307
    }
×
308

309
    // Calculate output shape
310
    std::vector<symbolic::Expression> output_shape;
11✔
311
    std::vector<int64_t> sorted_axes = axes_;
11✔
312
    // Normalize negative axes
313
    for (auto& axis : sorted_axes) {
12✔
314
        if (axis < 0) {
12✔
315
            axis = static_cast<int64_t>(shape_.size()) + axis;
2✔
316
        }
2✔
317
        // Validate axis is in bounds
318
        if (axis < 0 || axis >= static_cast<int64_t>(shape_.size())) {
12✔
319
            throw InvalidSDFGException(
×
320
                "Library Node: Axis value out of bounds. Axis: " + std::to_string(axis) +
×
321
                " Shape size: " + std::to_string(shape_.size())
×
322
            );
×
323
        }
×
324
    }
12✔
325
    std::sort(sorted_axes.begin(), sorted_axes.end());
11✔
326

327
    for (size_t i = 0; i < shape_.size(); ++i) {
30✔
328
        bool is_axis = false;
19✔
329
        for (auto axis : sorted_axes) {
21✔
330
            if (axis == (int64_t) i) {
21✔
331
                is_axis = true;
12✔
332
                break;
12✔
333
            }
12✔
334
        }
21✔
335

336
        if (is_axis) {
19✔
337
            if (keepdims_) {
12✔
338
                output_shape.push_back(symbolic::one());
1✔
339
            }
1✔
340
        } else {
12✔
341
            output_shape.push_back(shape_[i]);
7✔
342
        }
7✔
343
    }
19✔
344

345

346
    return expand_inner(
11✔
347
        builder,
11✔
348
        analysis_manager,
11✔
349
        block,
11✔
350
        dataflow,
11✔
351
        parent,
11✔
352
        transition,
11✔
353
        iedge_input,
11✔
354
        iedge_result,
11✔
355
        input_node,
11✔
356
        output_node,
11✔
357
        output_shape,
11✔
358
        sorted_axes
11✔
359
    );
11✔
360
}
11✔
361

362
} // namespace tensor
363
} // namespace math
364
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc