• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28158800507

25 Jun 2026 08:57AM UTC coverage: 61.644% (+0.06%) from 61.582%
28158800507

push

github

web-flow
MapFusionByDomain (#771)

 + New Map fusion caches data about iteration domain and map candidates
 + only matches up iteration domain exactly, per loop level.
 + Can support fusing non-leaf stacks of loops (stack ends where the shallower stack stops being perfectly nested & parallel)
 + new Element::replace for bulk replacements
 + New PatternMatcher visitor supports descending into replaced or modified nodes to allow for single-pass nested loop fusings
 + LoopAnalysis can now be kept up-to-date with changes done by Map-fusion
 + unit tests for the updating of LoopAnalysis
 * updated LoopAnalysis to be easier to keep up-to-date with changes. LoopTree is no longer ordered, if you want to iterate in pre-order, use the specific method for that
 + convenience StructuredSDFGBuilder.remove_from_parent()
 + RedundantLoadElim pass to skip reading from memory locations that have just been written (same block). Fusing no longer needs to do this
     RedundantLoadElimination does a simple check for other writes to the same structure. Can skip writes if redundant or not modify, if their are writes to different indices
* Updated verifiers to match new fusion
~ moved verifier checks behind correctness checks in npbench harness. Its more critical if we do not even get the expected results
* Added MapFusionByDomain also to loop-norm stage (currently inactive, causes more kernels that currently cannot be safely offloaded to CUDA.
---------

Co-authored-by: Lukas Truemper <lukas.truemper@outlook.de>

771 of 1186 new or added lines in 55 files covered. (65.01%)

6 existing lines in 6 files now uncovered.

38302 of 62134 relevant lines covered (61.64%)

987.24 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/sdfg/src/data_flow/library_nodes/math/tensor/broadcast_node.cpp
1
#include "sdfg/data_flow/library_nodes/math/tensor/broadcast_node.h"
2
#include "sdfg/builder/structured_sdfg_builder.h"
3
#include "sdfg/structured_control_flow/for.h"
4

5
namespace sdfg {
6
namespace math {
7
namespace tensor {
8

9
BroadcastNode::BroadcastNode(
10
    size_t element_id,
11
    const DebugInfo& debug_info,
12
    const graph::Vertex vertex,
13
    data_flow::DataFlowGraph& parent,
14
    const std::vector<symbolic::Expression>& input_shape,
15
    const std::vector<symbolic::Expression>& output_shape
16
)
17
    : TensorNode(
×
18
          element_id,
×
19
          debug_info,
×
20
          vertex,
×
21
          parent,
×
22
          LibraryNodeType_Broadcast,
×
23
          {"Y"},
×
24
          {"X"},
×
25
          data_flow::ImplementationType_NONE
×
26
      ),
×
27
      input_shape_(input_shape), output_shape_(output_shape) {}
×
28

29
void BroadcastNode::validate(const Function& function) const {
×
30
    TensorNode::validate(function);
×
31

32
    auto& graph = this->get_parent();
×
33

34
    auto& iedge = *graph.in_edges(*this).begin();
×
35
    auto& shape = static_cast<const types::Tensor&>(iedge.base_type());
×
36
    if (!shape.is_scalar()) {
×
37
        if (shape.shape().size() != this->input_shape_.size()) {
×
38
            throw InvalidSDFGException(
×
39
                "Library Node: Tensor shape must match node shape. Tensor shape: " +
×
40
                std::to_string(shape.shape().size()) + " Node shape: " + std::to_string(this->input_shape_.size())
×
41
            );
×
42
        }
×
43
        for (size_t i = 0; i < this->input_shape_.size(); ++i) {
×
44
            if (!symbolic::eq(shape.shape().at(i), this->input_shape_.at(i))) {
×
45
                throw InvalidSDFGException(
×
46
                    "Library Node: Tensor shape does not match expected shape. Tensor shape: " +
×
47
                    shape.shape().at(i)->__str__() + " Expected shape: " + this->input_shape_.at(i)->__str__()
×
48
                );
×
49
            }
×
50
        }
×
51
    }
×
52

53
    auto& oedge = *graph.out_edges(*this).begin();
×
54
    auto& output_shape = static_cast<const types::Tensor&>(oedge.base_type());
×
55
    if (output_shape.shape().size() != this->output_shape_.size()) {
×
56
        throw InvalidSDFGException(
×
57
            "Library Node: Output tensor shape must match node shape. Output tensor shape: " +
×
58
            std::to_string(output_shape.shape().size()) + " Node shape: " + std::to_string(this->output_shape_.size())
×
59
        );
×
60
    }
×
61

62
    for (size_t i = 0; i < this->output_shape_.size(); ++i) {
×
63
        if (!symbolic::eq(output_shape.shape().at(i), this->output_shape_.at(i))) {
×
64
            throw InvalidSDFGException(
×
65
                "Library Node: Output tensor shape does not match expected shape. Output tensor shape: " +
×
66
                output_shape.shape().at(i)->__str__() + " Expected shape: " + this->output_shape_.at(i)->__str__()
×
67
            );
×
68
        }
×
69
    }
×
70
}
×
71

72
symbolic::SymbolSet BroadcastNode::symbols() const {
×
73
    symbolic::SymbolSet syms;
×
74
    for (const auto& dim : input_shape_) {
×
75
        for (auto& atom : symbolic::atoms(dim)) {
×
76
            syms.insert(atom);
×
77
        }
×
78
    }
×
79
    for (const auto& dim : output_shape_) {
×
80
        for (auto& atom : symbolic::atoms(dim)) {
×
81
            syms.insert(atom);
×
82
        }
×
83
    }
×
84
    return syms;
×
85
}
×
86

87
void BroadcastNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {
×
88
    for (auto& dim : input_shape_) {
×
89
        dim = symbolic::subs(dim, old_expression, new_expression);
×
90
    }
×
91
    for (auto& dim : output_shape_) {
×
92
        dim = symbolic::subs(dim, old_expression, new_expression);
×
93
    }
×
94
}
×
95

NEW
96
void BroadcastNode::replace(const symbolic::ExpressionMapping& replacements) {
×
NEW
97
    for (auto& dim : input_shape_) {
×
NEW
98
        dim = symbolic::subs(dim, replacements);
×
NEW
99
    }
×
NEW
100
    for (auto& dim : output_shape_) {
×
NEW
101
        dim = symbolic::subs(dim, replacements);
×
NEW
102
    }
×
NEW
103
}
×
104

105
bool BroadcastNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
×
106
    auto& dataflow = this->get_parent();
×
107
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());
×
108

109
    if (dataflow.in_degree(*this) != 1 || dataflow.out_degree(*this) != 1) {
×
110
        return false;
×
111
    }
×
112

113
    auto& parent = static_cast<structured_control_flow::Sequence&>(*block.get_parent());
×
114

115
    auto& in_edge = *dataflow.in_edges(*this).begin();
×
116
    auto& out_edge = *dataflow.out_edges(*this).begin();
×
117
    auto& in_node = static_cast<data_flow::AccessNode&>(in_edge.src());
×
118
    auto& out_node = static_cast<data_flow::AccessNode&>(out_edge.dst());
×
119

120
    symbolic::MultiExpression loop_vars;
×
121
    structured_control_flow::Sequence* inner_scope = nullptr;
×
122

123
    for (size_t i = 0; i < output_shape_.size(); ++i) {
×
124
        std::string var_name = builder.find_new_name("_i" + std::to_string(i));
×
125
        builder.add_container(var_name, types::Scalar(types::PrimitiveType::Int64));
×
126

127
        auto sym_var = symbolic::symbol(var_name);
×
128
        auto condition = symbolic::Lt(sym_var, output_shape_[i]);
×
129
        auto init = symbolic::zero();
×
130
        auto update = symbolic::add(sym_var, symbolic::one());
×
131

132
        if (i == 0) {
×
133
            auto& loop = builder.add_map_before(
×
134
                parent,
×
135
                block,
×
136
                sym_var,
×
137
                condition,
×
138
                init,
×
139
                update,
×
140
                structured_control_flow::ScheduleType_Sequential::create(),
×
141
                {},
×
142
                this->debug_info()
×
143
            );
×
144
            inner_scope = &loop.root();
×
145
        } else {
×
146
            auto& loop = builder.add_map(
×
147
                *inner_scope,
×
148
                sym_var,
×
149
                condition,
×
150
                init,
×
151
                update,
×
152
                structured_control_flow::ScheduleType_Sequential::create(),
×
153
                {},
×
154
                this->debug_info()
×
155
            );
×
156
            inner_scope = &loop.root();
×
157
        }
×
158
        loop_vars.push_back(sym_var);
×
159
    }
×
160

161
    auto& tasklet_block = builder.add_block(*inner_scope, {}, this->debug_info());
×
162

163
    auto& in_acc = builder.add_access(tasklet_block, in_node.data());
×
164
    auto& out_acc = builder.add_access(tasklet_block, out_node.data());
×
165

166
    symbolic::MultiExpression input_subset = {};
×
167
    for (size_t i = 0; i < input_shape_.size(); ++i) {
×
168
        if (!symbolic::eq(input_shape_[i], symbolic::one())) {
×
169
            input_subset.push_back(loop_vars[i]);
×
170
        } else {
×
171
            input_subset.push_back(symbolic::zero());
×
172
        }
×
173
    }
×
174
    auto& iedge_tensor = static_cast<const types::Tensor&>(in_edge.base_type());
×
175
    if (iedge_tensor.is_scalar()) {
×
176
        input_subset = {};
×
177
    }
×
178

179
    auto& tasklet =
×
180
        builder.add_tasklet(tasklet_block, data_flow::TaskletCode::assign, "_out", {"_in"}, this->debug_info());
×
181

182
    builder.add_computational_memlet(
×
183
        tasklet_block, in_acc, tasklet, "_in", input_subset, in_edge.base_type(), this->debug_info()
×
184
    );
×
185
    builder.add_computational_memlet(
×
186
        tasklet_block, tasklet, "_out", out_acc, loop_vars, out_edge.base_type(), this->debug_info()
×
187
    );
×
188

189
    builder.remove_memlet(block, in_edge);
×
190
    builder.remove_memlet(block, out_edge);
×
191
    builder.remove_node(block, in_node);
×
192
    builder.remove_node(block, out_node);
×
193
    builder.remove_node(block, *this);
×
194

195
    int index = parent.index(block);
×
196
    builder.remove_child(parent, index);
×
197

198
    return true;
×
199
}
×
200

201
std::unique_ptr<data_flow::DataFlowNode> BroadcastNode::
202
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
×
203
    return std::unique_ptr<data_flow::DataFlowNode>(
×
204
        new BroadcastNode(element_id, this->debug_info(), vertex, parent, input_shape_, output_shape_)
×
205
    );
×
206
}
×
207

208
} // namespace tensor
209
} // namespace math
210
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc