27330290888

Committed 11 Jun 2026 07:12AM UTC coverage: 61.123% (+0.02%) from 61.108%

Build # 27330290888

Build Type

push

github

Committed by

web-flow

Commit Message

Structured ControlFlowNodes now keep a reference to their parent, no longer need ScopeAnalysis (#749)

* updated all current users of ScopeAnalysis to use the integrated parents instead
 + a few unit tests for parent relationship
 * refactored StructuredSDFGBuilder to reuse the same code to add ControlFlowNodes to the SDFG
 * generified mechanism for ControlFlowNode constructors to create further inner nodes (ex: Sequence inside Map) with element id reservation.
 - removed access to ScopeAnalysis from python bindings
 + access to parent reference in python bindings

Coverage Stats

177 of 225 new or added lines in 59 files covered. (78.67%)

12 existing lines in 6 files now uncovered.

35879 of 58700 relevant lines covered (61.12%)

750.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/sdfg/src/data_flow/library_nodes/math/tensor/reduce_ops/softmax_node.cpp

#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/softmax_node.h"
#include "sdfg/builder/structured_sdfg_builder.h"
#include "sdfg/data_flow/library_nodes/math/tensor/broadcast_node.h"
#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/div_node.h"
#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/exp_node.h"
#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/sub_node.h"
#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/max_node.h"
#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/sum_node.h"
#include "sdfg/data_flow/library_nodes/stdlib/malloc.h"
#include "sdfg/structured_control_flow/block.h"
#include "sdfg/structured_control_flow/for.h"
#include "sdfg/types/pointer.h"
#include "sdfg/types/scalar.h"
#include "sdfg/types/utils.h"

namespace sdfg {
namespace math {
namespace tensor {

SoftmaxNode::SoftmaxNode(
    size_t element_id,
    const DebugInfo& debug_info,
    const graph::Vertex vertex,
    data_flow::DataFlowGraph& parent,
    const std::vector<symbolic::Expression>& shape,
    const std::vector<int64_t>& axes,
    bool keepdims
)
    : ReduceNode(element_id, debug_info, vertex, parent, LibraryNodeType_Softmax, shape, axes, keepdims) {
    if (keepdims) {
        throw InvalidSDFGException("Unsupported attribute on library node: softmax");
    }
}

void SoftmaxNode::validate(const Function& function) const {}

bool SoftmaxNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& dataflow = this->get_parent();
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());

    if (dataflow.in_degree(*this) != 1 || dataflow.out_degree(*this) != 1) {
        return false;
    }

    auto& parent = static_cast<structured_control_flow::Sequence&>(*block.get_parent());

    auto& in_edge = *dataflow.in_edges(*this).begin();
    auto& out_edge = *dataflow.out_edges(*this).begin();
    auto& in_node = static_cast<data_flow::AccessNode&>(in_edge.src());
    auto& out_node = static_cast<data_flow::AccessNode&>(out_edge.dst());

    // Calculate reduced shape (for Max and Sum)
    std::vector<symbolic::Expression> reduced_shape;
    std::vector<int64_t> sorted_axes = axes_;
    // Normalize negative axes
    for (auto& axis : sorted_axes) {
        if (axis < 0) {
            axis = static_cast<int64_t>(shape_.size()) + axis;
        }
        // Validate axis is in bounds
        if (axis < 0 || axis >= static_cast<int64_t>(shape_.size())) {
            throw InvalidSDFGException(
                "Library Node: Axis value out of bounds. Axis: " + std::to_string(axis) +
                " Shape size: " + std::to_string(shape_.size())
            );
        }
    }
    std::sort(sorted_axes.begin(), sorted_axes.end());

    for (size_t i = 0; i < shape_.size(); ++i) {
        bool is_axis = false;
        for (auto axis : sorted_axes) {
            if (axis == (int64_t) i) {
                is_axis = true;
                break;
            }
        }

        if (is_axis) {
            reduced_shape.push_back(symbolic::one());
        } else {
            reduced_shape.push_back(shape_[i]);
        }
    }

    types::Scalar element_type(this->primitive_type(dataflow));
    types::Pointer pointer_type(element_type);

    // Type to store reduced results (e.g., max and sum)
    types::Tensor reduced_tensor_type(element_type, reduced_shape);
    // Type for broadcasted tensors (e.g., max and sum after broadcasting)
    // Compute broadcast strides: use strides from reduced_tensor_type, but set to zero for reduced dimensions
    symbolic::MultiExpression reduced_strides = reduced_tensor_type.strides();
    symbolic::MultiExpression broadcast_strides;
    for (size_t i = 0; i < shape_.size(); ++i) {
        bool is_reduced = std::find(sorted_axes.begin(), sorted_axes.end(), static_cast<int64_t>(i)) !=
                          sorted_axes.end();
        if (is_reduced) {
            broadcast_strides.push_back(symbolic::zero());
        } else {
            broadcast_strides.push_back(reduced_strides[i]);
        }
    }
    types::Tensor broadcast_tensor_type(element_type, shape_, broadcast_strides);

    // Temporary buffers
    std::string tmp_max_name = builder.find_new_name("_softmax_max");
    builder.add_container(tmp_max_name, pointer_type);

    std::string tmp_sub_name = builder.find_new_name("_softmax_sub");
    builder.add_container(tmp_sub_name, pointer_type);

    std::string tmp_exp_name = builder.find_new_name("_softmax_exp");
    builder.add_container(tmp_exp_name, pointer_type);

    std::string tmp_sum_name = builder.find_new_name("_softmax_sum");
    builder.add_container(tmp_sum_name, pointer_type);

    // Mallocs
    {
        symbolic::Expression bytes_elem = types::get_type_size(element_type, false);

        symbolic::Expression bytes_full = bytes_elem;
        for (auto& dim : this->shape_) {
            bytes_full = symbolic::mul(dim, bytes_full);
        }

        symbolic::Expression bytes_reduced = bytes_elem;
        for (auto& dim : reduced_shape) {
            bytes_reduced = symbolic::mul(dim, bytes_reduced);
        }

        auto& alloc_block = builder.add_block_before(parent, block, {}, this->debug_info());

        auto malloc_helper = [&](const std::string& name, const symbolic::Expression& size) {
            auto& access = builder.add_access(alloc_block, name);
            auto& malloc_node = builder.add_library_node<stdlib::MallocNode>(alloc_block, this->debug_info(), size);
            builder
                .add_computational_memlet(alloc_block, malloc_node, "_ret", access, {}, pointer_type, this->debug_info());
        };

        malloc_helper(tmp_max_name, bytes_reduced);
        malloc_helper(tmp_sub_name, bytes_full);
        malloc_helper(tmp_exp_name, bytes_full);
        malloc_helper(tmp_sum_name, bytes_reduced);
    }

    // 1. Max(X) -> TmpMax
    {
        auto& max_block = builder.add_block_before(parent, block, {}, this->debug_info());
        auto& max_node =
            builder.add_library_node<MaxNode>(max_block, this->debug_info(), this->shape_, this->axes_, true);

        auto& in_access = builder.add_access(max_block, in_node.data());
        auto& out_access = builder.add_access(max_block, tmp_max_name);

        builder
            .add_computational_memlet(max_block, in_access, max_node, "X", {}, in_edge.base_type(), this->debug_info());
        builder
            .add_computational_memlet(max_block, max_node, "Y", out_access, {}, reduced_tensor_type, this->debug_info());
    }

    // 2. Sub(X, TmpMaxBcast) -> TmpSub
    {
        auto& sub_block = builder.add_block_before(parent, block, {}, this->debug_info());
        auto& sub_node = builder.add_library_node<SubNode>(sub_block, this->debug_info(), this->shape_);

        auto& in1_access = builder.add_access(sub_block, in_node.data());
        auto& in2_access = builder.add_access(sub_block, tmp_max_name);
        auto& out_access = builder.add_access(sub_block, tmp_sub_name);

        builder
            .add_computational_memlet(sub_block, in1_access, sub_node, "A", {}, in_edge.base_type(), this->debug_info());
        builder
            .add_computational_memlet(sub_block, in2_access, sub_node, "B", {}, broadcast_tensor_type, this->debug_info());
        builder
            .add_computational_memlet(sub_block, sub_node, "C", out_access, {}, in_edge.base_type(), this->debug_info());
    }

    // 3. Exp(TmpSub) -> TmpExp
    {
        auto& exp_block = builder.add_block_before(parent, block, {}, this->debug_info());
        auto& exp_node = builder.add_library_node<ExpNode>(exp_block, this->debug_info(), this->shape_);

        auto& in_access = builder.add_access(exp_block, tmp_sub_name);
        auto& out_access = builder.add_access(exp_block, tmp_exp_name);

        builder
            .add_computational_memlet(exp_block, in_access, exp_node, "X", {}, in_edge.base_type(), this->debug_info());
        builder
            .add_computational_memlet(exp_block, exp_node, "Y", out_access, {}, in_edge.base_type(), this->debug_info());
    }

    // 4. Sum(TmpExp) -> TmpSum
    {
        auto& sum_block = builder.add_block_before(parent, block, {}, this->debug_info());
        auto& sum_node =
            builder.add_library_node<SumNode>(sum_block, this->debug_info(), this->shape_, this->axes_, true);

        auto& in_access = builder.add_access(sum_block, tmp_exp_name);
        auto& out_access = builder.add_access(sum_block, tmp_sum_name);

        builder
            .add_computational_memlet(sum_block, in_access, sum_node, "X", {}, in_edge.base_type(), this->debug_info());
        builder
            .add_computational_memlet(sum_block, sum_node, "Y", out_access, {}, reduced_tensor_type, this->debug_info());
    }

    // 5. Div(TmpExp, TmpSum) -> Output
    {
        auto& div_block = builder.add_block_before(parent, block, {}, this->debug_info());
        auto& div_node = builder.add_library_node<DivNode>(div_block, this->debug_info(), this->shape_);

        auto& in1_access = builder.add_access(div_block, tmp_exp_name);
        auto& in2_access = builder.add_access(div_block, tmp_sum_name);
        auto& out_access = builder.add_access(div_block, out_node.data());

        builder
            .add_computational_memlet(div_block, in1_access, div_node, "A", {}, in_edge.base_type(), this->debug_info());
        builder
            .add_computational_memlet(div_block, in2_access, div_node, "B", {}, broadcast_tensor_type, this->debug_info());
        builder
            .add_computational_memlet(div_block, div_node, "C", out_access, {}, out_edge.base_type(), this->debug_info());
    }

    // Cleanup
    builder.remove_memlet(block, in_edge);
    builder.remove_memlet(block, out_edge);
    builder.remove_node(block, in_node);
    builder.remove_node(block, out_node);
    builder.remove_node(block, *this);

    int last_index = parent.index(block);
    builder.remove_child(parent, last_index);

    return true;
}

std::unique_ptr<data_flow::DataFlowNode> SoftmaxNode::
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
    return std::unique_ptr<
        data_flow::DataFlowNode>(new SoftmaxNode(element_id, this->debug_info(), vertex, parent, this->shape_, this->axes_)
    );
}

} // namespace tensor
} // namespace math
} // namespace sdfg

1	#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/softmax_node.h"
2	#include "sdfg/builder/structured_sdfg_builder.h"
3	#include "sdfg/data_flow/library_nodes/math/tensor/broadcast_node.h"
4	#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/div_node.h"
5	#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/exp_node.h"
6	#include "sdfg/data_flow/library_nodes/math/tensor/elementwise_ops/sub_node.h"
7	#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/max_node.h"
8	#include "sdfg/data_flow/library_nodes/math/tensor/reduce_ops/sum_node.h"
9	#include "sdfg/data_flow/library_nodes/stdlib/malloc.h"
10	#include "sdfg/structured_control_flow/block.h"
11	#include "sdfg/structured_control_flow/for.h"
12	#include "sdfg/types/pointer.h"
13	#include "sdfg/types/scalar.h"
14	#include "sdfg/types/utils.h"
15
16	namespace sdfg {
17	namespace math {
18	namespace tensor {
19
20	SoftmaxNode::SoftmaxNode(
21	size_t element_id,
22	const DebugInfo& debug_info,
23	const graph::Vertex vertex,
24	data_flow::DataFlowGraph& parent,
25	const std::vector<symbolic::Expression>& shape,
26	const std::vector<int64_t>& axes,
27	bool keepdims
28	)
29	: ReduceNode(element_id, debug_info, vertex, parent, LibraryNodeType_Softmax, shape, axes, keepdims) {	×
30	if (keepdims) {	×
31	throw InvalidSDFGException("Unsupported attribute on library node: softmax");	×
32	}	×
33	}	×
34
35	void SoftmaxNode::validate(const Function& function) const {}	×
36
37	bool SoftmaxNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	×
38	auto& dataflow = this->get_parent();	×
39	auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());	×
40
41	if (dataflow.in_degree(this) != 1 \|\| dataflow.out_degree(this) != 1) {	×
42	return false;	×
43	}	×
44
NEW 45	auto& parent = static_cast<structured_control_flow::Sequence&>(*block.get_parent());	×
46
47	auto& in_edge = dataflow.in_edges(this).begin();	×
48	auto& out_edge = dataflow.out_edges(this).begin();	×
49	auto& in_node = static_cast<data_flow::AccessNode&>(in_edge.src());	×
50	auto& out_node = static_cast<data_flow::AccessNode&>(out_edge.dst());	×
51
52	// Calculate reduced shape (for Max and Sum)
53	std::vector<symbolic::Expression> reduced_shape;	×
54	std::vector<int64_t> sorted_axes = axes_;	×
55	// Normalize negative axes
56	for (auto& axis : sorted_axes) {	×
57	if (axis < 0) {	×
58	axis = static_cast<int64_t>(shape_.size()) + axis;	×
59	}	×
60	// Validate axis is in bounds
61	if (axis < 0 \|\| axis >= static_cast<int64_t>(shape_.size())) {	×
62	throw InvalidSDFGException(	×
63	"Library Node: Axis value out of bounds. Axis: " + std::to_string(axis) +	×
64	" Shape size: " + std::to_string(shape_.size())	×
65	);	×
66	}	×
67	}	×
68	std::sort(sorted_axes.begin(), sorted_axes.end());	×
69
70	for (size_t i = 0; i < shape_.size(); ++i) {	×
71	bool is_axis = false;	×
72	for (auto axis : sorted_axes) {	×
73	if (axis == (int64_t) i) {	×
74	is_axis = true;	×
75	break;	×
76	}	×
77	}	×
78
79	if (is_axis) {	×
80	reduced_shape.push_back(symbolic::one());	×
81	} else {	×
82	reduced_shape.push_back(shape_[i]);	×
83	}	×
84	}	×
85
86	types::Scalar element_type(this->primitive_type(dataflow));	×
87	types::Pointer pointer_type(element_type);	×
88
89	// Type to store reduced results (e.g., max and sum)
90	types::Tensor reduced_tensor_type(element_type, reduced_shape);	×
91	// Type for broadcasted tensors (e.g., max and sum after broadcasting)
92	// Compute broadcast strides: use strides from reduced_tensor_type, but set to zero for reduced dimensions
93	symbolic::MultiExpression reduced_strides = reduced_tensor_type.strides();	×
94	symbolic::MultiExpression broadcast_strides;	×
95	for (size_t i = 0; i < shape_.size(); ++i) {	×
96	bool is_reduced = std::find(sorted_axes.begin(), sorted_axes.end(), static_cast<int64_t>(i)) !=	×
97	sorted_axes.end();	×
98	if (is_reduced) {	×
99	broadcast_strides.push_back(symbolic::zero());	×
100	} else {	×
101	broadcast_strides.push_back(reduced_strides[i]);	×
102	}	×
103	}	×
104	types::Tensor broadcast_tensor_type(element_type, shape_, broadcast_strides);	×
105
106	// Temporary buffers
107	std::string tmp_max_name = builder.find_new_name("_softmax_max");	×
108	builder.add_container(tmp_max_name, pointer_type);	×
109
110	std::string tmp_sub_name = builder.find_new_name("_softmax_sub");	×
111	builder.add_container(tmp_sub_name, pointer_type);	×
112
113	std::string tmp_exp_name = builder.find_new_name("_softmax_exp");	×
114	builder.add_container(tmp_exp_name, pointer_type);	×
115
116	std::string tmp_sum_name = builder.find_new_name("_softmax_sum");	×
117	builder.add_container(tmp_sum_name, pointer_type);	×
118
119	// Mallocs
120	{	×
121	symbolic::Expression bytes_elem = types::get_type_size(element_type, false);	×
122
123	symbolic::Expression bytes_full = bytes_elem;	×
124	for (auto& dim : this->shape_) {	×
125	bytes_full = symbolic::mul(dim, bytes_full);	×
126	}	×
127
128	symbolic::Expression bytes_reduced = bytes_elem;	×
129	for (auto& dim : reduced_shape) {	×
130	bytes_reduced = symbolic::mul(dim, bytes_reduced);	×
131	}	×
132
133	auto& alloc_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
134
135	auto malloc_helper = [&](const std::string& name, const symbolic::Expression& size) {	×
136	auto& access = builder.add_access(alloc_block, name);	×
137	auto& malloc_node = builder.add_library_node<stdlib::MallocNode>(alloc_block, this->debug_info(), size);	×
138	builder	×
139	.add_computational_memlet(alloc_block, malloc_node, "_ret", access, {}, pointer_type, this->debug_info());	×
140	};	×
141
142	malloc_helper(tmp_max_name, bytes_reduced);	×
143	malloc_helper(tmp_sub_name, bytes_full);	×
144	malloc_helper(tmp_exp_name, bytes_full);	×
145	malloc_helper(tmp_sum_name, bytes_reduced);	×
146	}	×
147
148	// 1. Max(X) -> TmpMax
149	{	×
150	auto& max_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
151	auto& max_node =	×
152	builder.add_library_node<MaxNode>(max_block, this->debug_info(), this->shape_, this->axes_, true);	×
153
154	auto& in_access = builder.add_access(max_block, in_node.data());	×
155	auto& out_access = builder.add_access(max_block, tmp_max_name);	×
156
157	builder	×
158	.add_computational_memlet(max_block, in_access, max_node, "X", {}, in_edge.base_type(), this->debug_info());	×
159	builder	×
160	.add_computational_memlet(max_block, max_node, "Y", out_access, {}, reduced_tensor_type, this->debug_info());	×
161	}	×
162
163	// 2. Sub(X, TmpMaxBcast) -> TmpSub
164	{	×
165	auto& sub_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
166	auto& sub_node = builder.add_library_node<SubNode>(sub_block, this->debug_info(), this->shape_);	×
167
168	auto& in1_access = builder.add_access(sub_block, in_node.data());	×
169	auto& in2_access = builder.add_access(sub_block, tmp_max_name);	×
170	auto& out_access = builder.add_access(sub_block, tmp_sub_name);	×
171
172	builder	×
173	.add_computational_memlet(sub_block, in1_access, sub_node, "A", {}, in_edge.base_type(), this->debug_info());	×
174	builder	×
175	.add_computational_memlet(sub_block, in2_access, sub_node, "B", {}, broadcast_tensor_type, this->debug_info());	×
176	builder	×
177	.add_computational_memlet(sub_block, sub_node, "C", out_access, {}, in_edge.base_type(), this->debug_info());	×
178	}	×
179
180	// 3. Exp(TmpSub) -> TmpExp
181	{	×
182	auto& exp_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
183	auto& exp_node = builder.add_library_node<ExpNode>(exp_block, this->debug_info(), this->shape_);	×
184
185	auto& in_access = builder.add_access(exp_block, tmp_sub_name);	×
186	auto& out_access = builder.add_access(exp_block, tmp_exp_name);	×
187
188	builder	×
189	.add_computational_memlet(exp_block, in_access, exp_node, "X", {}, in_edge.base_type(), this->debug_info());	×
190	builder	×
191	.add_computational_memlet(exp_block, exp_node, "Y", out_access, {}, in_edge.base_type(), this->debug_info());	×
192	}	×
193
194	// 4. Sum(TmpExp) -> TmpSum
195	{	×
196	auto& sum_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
197	auto& sum_node =	×
198	builder.add_library_node<SumNode>(sum_block, this->debug_info(), this->shape_, this->axes_, true);	×
199
200	auto& in_access = builder.add_access(sum_block, tmp_exp_name);	×
201	auto& out_access = builder.add_access(sum_block, tmp_sum_name);	×
202
203	builder	×
204	.add_computational_memlet(sum_block, in_access, sum_node, "X", {}, in_edge.base_type(), this->debug_info());	×
205	builder	×
206	.add_computational_memlet(sum_block, sum_node, "Y", out_access, {}, reduced_tensor_type, this->debug_info());	×
207	}	×
208
209	// 5. Div(TmpExp, TmpSum) -> Output
210	{	×
211	auto& div_block = builder.add_block_before(parent, block, {}, this->debug_info());	×
212	auto& div_node = builder.add_library_node<DivNode>(div_block, this->debug_info(), this->shape_);	×
213
214	auto& in1_access = builder.add_access(div_block, tmp_exp_name);	×
215	auto& in2_access = builder.add_access(div_block, tmp_sum_name);	×
216	auto& out_access = builder.add_access(div_block, out_node.data());	×
217
218	builder	×
219	.add_computational_memlet(div_block, in1_access, div_node, "A", {}, in_edge.base_type(), this->debug_info());	×
220	builder	×
221	.add_computational_memlet(div_block, in2_access, div_node, "B", {}, broadcast_tensor_type, this->debug_info());	×
222	builder	×
223	.add_computational_memlet(div_block, div_node, "C", out_access, {}, out_edge.base_type(), this->debug_info());	×
224	}	×
225
226	// Cleanup
227	builder.remove_memlet(block, in_edge);	×
228	builder.remove_memlet(block, out_edge);	×
229	builder.remove_node(block, in_node);	×
230	builder.remove_node(block, out_node);	×
231	builder.remove_node(block, *this);	×
232
233	int last_index = parent.index(block);	×
234	builder.remove_child(parent, last_index);	×
235
236	return true;	×
237	}	×
238
239	std::unique_ptr<data_flow::DataFlowNode> SoftmaxNode::
240	clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {	×
241	return std::unique_ptr<	×
242	data_flow::DataFlowNode>(new SoftmaxNode(element_id, this->debug_info(), vertex, parent, this->shape_, this->axes_)	×
243	);	×
244	}	×
245
246	} // namespace tensor
247	} // namespace math
248	} // namespace sdfg

daisytuner / docc / 27330290888

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous