17651658650

Committed 11 Sep 2025 04:58PM UTC coverage: 61.012% (+1.3%) from 59.755%

Build # 17651658650

Build Type

Pull #219

github

Committed by

web-flow

Commit Message

Merge 742a12367 into f744ac9f5

Pull Request Pull Request #219: stdlib Library Nodes and ConstantNodes

Run Details

499 of 1681 new or added lines in 81 files covered. (29.68%)

95 existing lines in 36 files now uncovered.

9718 of 15928 relevant lines covered (61.01%)

108.0 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

59.75

/src/data_flow/library_nodes/math/ml/conv.cpp

#include "sdfg/data_flow/library_nodes/math/ml/conv.h"

#include "sdfg/analysis/analysis.h"
#include "sdfg/builder/structured_sdfg_builder.h"

#include "sdfg/analysis/scope_analysis.h"
#include "sdfg/debug_info.h"

namespace sdfg {
namespace math {
namespace ml {

ConvNode::ConvNode(
    size_t element_id,
    const DebugInfoRegion& debug_info,
    const graph::Vertex vertex,
    data_flow::DataFlowGraph& parent,
    const std::vector<symbolic::Expression> shape,
    bool has_bias,
    std::vector<size_t> dilations,
    std::vector<size_t> kernel_shape,
    std::vector<size_t> pads,
    std::vector<size_t> strides
)
    : MathNode(
          element_id,
          debug_info,
          vertex,
          parent,
          LibraryNodeType_Conv,
          {"Y"},
          {"X", "W"},
          data_flow::ImplementationType_NONE
      ),
      shape_(shape), has_bias_(has_bias), dilations_(dilations), kernel_shape_(kernel_shape), pads_(pads),
      strides_(strides) {
    if (has_bias_) {
        this->inputs_.push_back("B");
    }
}

const std::vector<symbolic::Expression>& ConvNode::shape() const { return shape_; }

symbolic::SymbolSet ConvNode::symbols() const {
    symbolic::SymbolSet syms;
    for (const auto& dim : shape_) {
        for (auto& atom : symbolic::atoms(dim)) {
            syms.insert(atom);
        }
    }
    return syms;
}

void ConvNode::replace(const symbolic::Expression& old_expression, const symbolic::Expression& new_expression) {
    for (auto& dim : shape_) {
        dim = symbolic::subs(dim, old_expression, new_expression);
    }
}

void ConvNode::validate(const Function& function) const {}

bool ConvNode::has_bias() const { return has_bias_; }

std::vector<size_t> ConvNode::dilations() const { return dilations_; }

std::vector<size_t> ConvNode::kernel_shape() const { return kernel_shape_; }

std::vector<size_t> ConvNode::pads() const { return pads_; }

std::vector<size_t> ConvNode::strides() const { return strides_; }

bool ConvNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& sdfg = builder.subject();
    auto& dataflow = this->get_parent();
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());

    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
    auto& parent = static_cast<structured_control_flow::Sequence&>(*scope_analysis.parent_scope(&block));
    int index = parent.index(block);
    auto& transition = parent.at(index).second;

    const data_flow::Memlet* iedge_X = nullptr;
    const data_flow::Memlet* iedge_W = nullptr;
    const data_flow::Memlet* iedge_B = nullptr;
    for (const auto& iedge : dataflow.in_edges(*this)) {
        if (iedge.dst_conn() == "X") {
            iedge_X = &iedge;
        } else if (iedge.dst_conn() == "W") {
            iedge_W = &iedge;
        } else if (iedge.dst_conn() == "B") {
            iedge_B = &iedge;
        }
    }

    const data_flow::Memlet* oedge_Y = nullptr;
    for (const auto& oedge : dataflow.out_edges(*this)) {
        if (oedge.src_conn() == "Y") {
            oedge_Y = &oedge;
            break;
        }
    }

    // Find names of input and output containers
    std::string X_name = static_cast<const data_flow::AccessNode&>(iedge_X->src()).data();
    std::string W_name = static_cast<const data_flow::AccessNode&>(iedge_W->src()).data();
    std::string Y_name = static_cast<const data_flow::AccessNode&>(oedge_Y->dst()).data();

    auto debug_info = builder.debug_info().get_region(block.debug_info().indices());
    auto& new_sequence = builder.add_sequence_before(parent, block, transition.assignments(), debug_info);
    structured_control_flow::Sequence* last_scope = &new_sequence;

    /************************
     * Parallel dimensions *
     ************************/
    // Generate one Map per parallel dimension of the output tensor (Y).
    data_flow::Subset out_subset;
    std::vector<symbolic::Expression> parallel_syms;
    structured_control_flow::Map* last_map = nullptr;
    for (size_t dim = 0; dim < this->shape_.size(); ++dim) {
        std::string indvar_str = builder.find_new_name("_i");
        builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));

        auto indvar = symbolic::symbol(indvar_str);
        auto init = symbolic::zero();
        auto update = symbolic::add(indvar, symbolic::one());
        auto condition = symbolic::Lt(indvar, this->shape_[dim]);

        last_map = &builder.add_map(
            *last_scope,
            indvar,
            condition,
            init,
            update,
            structured_control_flow::ScheduleType_Sequential::create(),
            {},
            debug_info
        );
        last_scope = &last_map->root();
        out_subset.push_back(indvar);
        parallel_syms.push_back(indvar);
    }

    /************************
     * Reduction dimensions *
     ************************/
    // For convolution, we reduce over input channels and kernel dimensions.
    // Assuming weight tensor layout (M, C, k1, k2, ...), skip the first dim (output channels).
    std::vector<symbolic::Expression> reduction_syms;
    structured_control_flow::For* last_for = nullptr;
    for (size_t dim = 0; dim < this->kernel_shape_.size(); ++dim) {
        std::string indvar_str = builder.find_new_name("_r");
        builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));

        auto indvar = symbolic::symbol(indvar_str);
        auto init = symbolic::zero();
        auto update = symbolic::add(indvar, symbolic::one());
        auto condition = symbolic::Lt(indvar, symbolic::integer(this->kernel_shape_[dim]));

        last_for = &builder.add_for(*last_scope, indvar, condition, init, update, {}, debug_info);
        last_scope = &last_for->root();
        reduction_syms.push_back(indvar);
    }

    // Add innermost code block – convolution computation.
    auto& code_block = builder.add_block(*last_scope, {}, debug_info);

    // Reuse debug infos from original access nodes (if available).
    const DebugInfos& dbg_X = builder.debug_info().get_region(iedge_X->src().debug_info().indices());
    const DebugInfos& dbg_W = builder.debug_info().get_region(iedge_W->src().debug_info().indices());
    const DebugInfos& dbg_Y = builder.debug_info().get_region(oedge_Y->dst().debug_info().indices());
    const DebugInfos& dbg_B = (iedge_B != nullptr)
                                  ? builder.debug_info().get_region(iedge_B->src().debug_info().indices())
                                  : DebugInfos();

    // Create new access nodes inside the innermost block.
    auto& X_acc = builder.add_access(code_block, X_name, dbg_X);
    auto& W_acc = builder.add_access(code_block, W_name, dbg_W);
    auto& Y_acc_in = builder.add_access(code_block, Y_name, dbg_Y);
    auto& Y_acc_out = builder.add_access(code_block, Y_name, dbg_Y);
    // Bias handled after reduction loops; no need to access B inside the reduction tasklet.

    /********************
     * Build subsets    *
     ********************/
    // Helper lambdas to safely fetch stride/dilation/pad values.
    auto int_expr = [](size_t v) { return symbolic::integer(static_cast<int64_t>(v)); };

    auto get_stride = [&](size_t idx) -> symbolic::Expression {
        if (idx < strides_.size()) {
            return int_expr(strides_[idx]);
        }
        return symbolic::one();
    };

    auto get_dilation = [&](size_t idx) -> symbolic::Expression {
        if (idx < dilations_.size()) {
            return int_expr(dilations_[idx]);
        }
        return symbolic::one();
    };

    auto get_pad = [&](size_t idx) -> symbolic::Expression {
        if (idx < pads_.size()) {
            return int_expr(pads_[idx]);
        }
        return symbolic::zero();
    };

    const size_t spatial_dims = kernel_shape_.size();

    // Extract commonly-used indices.
    auto get_parallel_sym = [&](size_t idx) -> symbolic::Expression {
        if (idx < parallel_syms.size()) return parallel_syms[idx];
        return symbolic::zero();
    };

    auto get_reduction_sym = [&](size_t idx) -> symbolic::Expression {
        if (idx < reduction_syms.size()) return reduction_syms[idx];
        return symbolic::zero();
    };

    auto N_idx = get_parallel_sym(0);
    auto M_idx = get_parallel_sym(1);

    // Input channel and kernel indices come from reduction variables.
    auto C_idx = get_reduction_sym(0);

    // Build X subset.
    data_flow::Subset subset_X;
    subset_X.push_back(N_idx); // Batch dim
    subset_X.push_back(C_idx); // Input channel dim
    for (size_t d = 0; d < spatial_dims; ++d) {
        symbolic::Expression out_d = get_parallel_sym(2 + d);
        symbolic::Expression ker_d = get_reduction_sym(1 + d);

        auto in_d = symbolic::
            sub(symbolic::add(symbolic::mul(out_d, get_stride(d)), symbolic::mul(ker_d, get_dilation(d))), get_pad(d));
        subset_X.push_back(in_d);
    }

    // Build W subset.
    data_flow::Subset subset_W;
    subset_W.push_back(M_idx); // Output channel (filter)
    subset_W.push_back(C_idx); // Input channel
    for (size_t d = 0; d < spatial_dims; ++d) {
        symbolic::Expression ker_d = get_reduction_sym(1 + d);
        subset_W.push_back(ker_d);
    }

    // Output Y subset is simply the parallel indices computed earlier.
    data_flow::Subset subset_Y = out_subset;

    // Bias subset (only along output channels).
    data_flow::Subset subset_B;
    if (has_bias_) {
        subset_B.push_back(M_idx);
    }

    /************************
     * Add computation node *
     ************************/
    // Create tasklet performing fused-multiply-add: _out = _x * _w + _y
    if (has_bias_) {
        // Bias will be added after reduction, so no change here.
    }

    auto& tasklet =
        builder.add_tasklet(code_block, data_flow::TaskletCode::fma, "_out", {"_x", "_w", "_y"}, debug_info);

    // Connect memlets.
    builder.add_computational_memlet(code_block, X_acc, tasklet, "_x", subset_X, iedge_X->base_type(), debug_info);
    builder.add_computational_memlet(code_block, W_acc, tasklet, "_w", subset_W, iedge_W->base_type(), debug_info);
    builder.add_computational_memlet(code_block, Y_acc_in, tasklet, "_y", subset_Y, oedge_Y->base_type(), debug_info);
    builder.add_computational_memlet(code_block, tasklet, "_out", Y_acc_out, subset_Y, oedge_Y->base_type(), debug_info);

    // Bias: add once per output element outside reduction loops.
    if (has_bias_) {
        // Insert after the reduction loops (i.e., right after they finish).
        // We add a single tasklet in the parent scope (last_map root).
        std::string B_name = static_cast<const data_flow::AccessNode&>(iedge_B->src()).data();
        auto& bias_block = builder.add_block(last_map->root(), {}, debug_info);
        auto& B_acc_local = builder.add_access(bias_block, B_name, dbg_B);
        auto& Y_acc2_in = builder.add_access(bias_block, Y_name, dbg_Y);
        auto& Y_acc2_out = builder.add_access(bias_block, Y_name, dbg_Y);

        auto& bias_tasklet =
            builder.add_tasklet(bias_block, data_flow::TaskletCode::add, "_out", {"_bias", "_y"}, debug_info);
        builder.add_computational_memlet(
            bias_block, B_acc_local, bias_tasklet, "_bias", subset_B, iedge_B->base_type(), debug_info
        );
        builder.add_computational_memlet(
            bias_block, Y_acc2_in, bias_tasklet, "_y", subset_Y, oedge_Y->base_type(), debug_info
        );
        builder.add_computational_memlet(
            bias_block, bias_tasklet, "_out", Y_acc2_out, subset_Y, oedge_Y->base_type(), debug_info
        );
    }

    // Clean up block
    builder.remove_memlet(block, *iedge_X);
    builder.remove_memlet(block, *iedge_W);
    if (iedge_B != nullptr) {
        builder.remove_memlet(block, *iedge_B);
    }
    builder.remove_memlet(block, *oedge_Y);
    builder.remove_node(block, *this);
    builder.remove_child(parent, index + 1);

    return true;
}

std::unique_ptr<data_flow::DataFlowNode> ConvNode::
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
    return std::unique_ptr<data_flow::DataFlowNode>(new ConvNode(
        element_id,
        this->debug_info(),
        vertex,
        parent,
        this->shape_,
        this->has_bias_,
        this->dilations_,
        this->kernel_shape_,
        this->pads_,
        this->strides_
    ));
}

nlohmann::json ConvNodeSerializer::serialize(const data_flow::LibraryNode& library_node) {
    const ConvNode& node = static_cast<const ConvNode&>(library_node);
    nlohmann::json j;

    j["code"] = node.code().value();
    j["outputs"] = node.outputs();
    j["inputs"] = node.inputs();
    j["has_bias"] = node.has_bias();
    j["dilations"] = node.dilations();
    j["kernel_shape"] = node.kernel_shape();
    j["pads"] = node.pads();
    j["strides"] = node.strides();

    serializer::JSONSerializer serializer;
    j["shape"] = nlohmann::json::array();
    for (auto& dim : node.shape()) {
        j["shape"].push_back(serializer.expression(dim));
    }

    return j;
}

data_flow::LibraryNode& ConvNodeSerializer::deserialize(
    const nlohmann::json& j, builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
) {
    auto code = j["code"].get<std::string>();
    if (code != LibraryNodeType_Conv.value()) {
        throw std::runtime_error("Invalid library node code");
    }

    sdfg::serializer::JSONSerializer serializer;
    DebugInfoRegion debug_info = serializer.json_to_debug_info_region(j["debug_info"], builder.debug_info());

    auto outputs = j.at("outputs").get<std::vector<std::string>>();
    auto inputs = j.at("inputs").get<std::vector<std::string>>();
    auto has_bias = j.at("has_bias").get<bool>();
    auto dilations = j.at("dilations").get<std::vector<size_t>>();
    auto kernel_shape = j.at("kernel_shape").get<std::vector<size_t>>();
    auto pads = j.at("pads").get<std::vector<size_t>>();
    auto strides = j.at("strides").get<std::vector<size_t>>();

    std::vector<symbolic::Expression> shape;
    for (const auto& dim : j["shape"]) {
        shape.push_back(SymEngine::Expression(dim.get<std::string>()));
    }

    return builder
        .add_library_node<ConvNode>(parent, debug_info, shape, has_bias, dilations, kernel_shape, pads, strides);
}

} // namespace ml
} // namespace math
} // namespace sdfg

1	#include "sdfg/data_flow/library_nodes/math/ml/conv.h"
2
3	#include "sdfg/analysis/analysis.h"
4	#include "sdfg/builder/structured_sdfg_builder.h"
5
6	#include "sdfg/analysis/scope_analysis.h"
7	#include "sdfg/debug_info.h"
8
9	namespace sdfg {
10	namespace math {
11	namespace ml {
12
13	ConvNode::ConvNode(	2✔
14	size_t element_id,
15	const DebugInfoRegion& debug_info,
16	const graph::Vertex vertex,
17	data_flow::DataFlowGraph& parent,
18	const std::vector<symbolic::Expression> shape,
19	bool has_bias,
20	std::vector<size_t> dilations,
21	std::vector<size_t> kernel_shape,
22	std::vector<size_t> pads,
23	std::vector<size_t> strides
24	)
25	: MathNode(	2✔
26	element_id,	2✔
27	debug_info,	2✔
28	vertex,	2✔
29	parent,	2✔
30	LibraryNodeType_Conv,
31	{"Y"},	2✔
32	{"X", "W"},	2✔
33	data_flow::ImplementationType_NONE
34	),
35	shape_(shape), has_bias_(has_bias), dilations_(dilations), kernel_shape_(kernel_shape), pads_(pads),	2✔
36	strides_(strides) {	4✔
37	if (has_bias_) {	2✔
38	this->inputs_.push_back("B");	×
39	}	×
40	}	2✔
41
NEW 42	const std::vector<symbolic::Expression>& ConvNode::shape() const { return shape_; }	×
43
NEW 44	symbolic::SymbolSet ConvNode::symbols() const {	×
NEW 45	symbolic::SymbolSet syms;	×
NEW 46	for (const auto& dim : shape_) {	×
NEW 47	for (auto& atom : symbolic::atoms(dim)) {	×
NEW 48	syms.insert(atom);	×
49	}
50	}
NEW 51	return syms;	×
UNCOV 52	}	×
53
NEW 54	void ConvNode::replace(const symbolic::Expression& old_expression, const symbolic::Expression& new_expression) {	×
NEW 55	for (auto& dim : shape_) {	×
NEW 56	dim = symbolic::subs(dim, old_expression, new_expression);	×
57	}
NEW 58	}	×
59
NEW 60	void ConvNode::validate(const Function& function) const {}	×
61
UNCOV 62	bool ConvNode::has_bias() const { return has_bias_; }	×
63
64	std::vector<size_t> ConvNode::dilations() const { return dilations_; }	×
65
66	std::vector<size_t> ConvNode::kernel_shape() const { return kernel_shape_; }	×
67
68	std::vector<size_t> ConvNode::pads() const { return pads_; }	×
69
70	std::vector<size_t> ConvNode::strides() const { return strides_; }	×
71
72	bool ConvNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	2✔
73	auto& sdfg = builder.subject();	2✔
74	auto& dataflow = this->get_parent();	2✔
75	auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());	2✔
76
77	auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();	2✔
78	auto& parent = static_cast<structured_control_flow::Sequence&>(*scope_analysis.parent_scope(&block));	2✔
79	int index = parent.index(block);	2✔
80	auto& transition = parent.at(index).second;	2✔
81
82	const data_flow::Memlet* iedge_X = nullptr;	2✔
83	const data_flow::Memlet* iedge_W = nullptr;	2✔
84	const data_flow::Memlet* iedge_B = nullptr;	2✔
85	for (const auto& iedge : dataflow.in_edges(*this)) {	6✔
86	if (iedge.dst_conn() == "X") {	4✔
87	iedge_X = &iedge;	2✔
88	} else if (iedge.dst_conn() == "W") {	4✔
89	iedge_W = &iedge;	2✔
90	} else if (iedge.dst_conn() == "B") {	2✔
91	iedge_B = &iedge;	×
92	}	×
93	}
94
95	const data_flow::Memlet* oedge_Y = nullptr;	2✔
96	for (const auto& oedge : dataflow.out_edges(*this)) {	2✔
97	if (oedge.src_conn() == "Y") {	2✔
98	oedge_Y = &oedge;	2✔
99	break;	2✔
100	}
101	}
102
103	// Find names of input and output containers
104	std::string X_name = static_cast<const data_flow::AccessNode&>(iedge_X->src()).data();	2✔
105	std::string W_name = static_cast<const data_flow::AccessNode&>(iedge_W->src()).data();	2✔
106	std::string Y_name = static_cast<const data_flow::AccessNode&>(oedge_Y->dst()).data();	2✔
107
108	auto debug_info = builder.debug_info().get_region(block.debug_info().indices());	2✔
109	auto& new_sequence = builder.add_sequence_before(parent, block, transition.assignments(), debug_info);	2✔
110	structured_control_flow::Sequence* last_scope = &new_sequence;	2✔
111
112	/************************
113	* Parallel dimensions *
114	************************/
115	// Generate one Map per parallel dimension of the output tensor (Y).
116	data_flow::Subset out_subset;	2✔
117	std::vector<symbolic::Expression> parallel_syms;	2✔
118	structured_control_flow::Map* last_map = nullptr;	2✔
119	for (size_t dim = 0; dim < this->shape_.size(); ++dim) {	10✔
120	std::string indvar_str = builder.find_new_name("_i");	8✔
121	builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));	8✔
122
123	auto indvar = symbolic::symbol(indvar_str);	8✔
124	auto init = symbolic::zero();	8✔
125	auto update = symbolic::add(indvar, symbolic::one());	8✔
126	auto condition = symbolic::Lt(indvar, this->shape_[dim]);	8✔
127
128	last_map = &builder.add_map(	16✔
129	*last_scope,	8✔
130	indvar,
131	condition,
132	init,	8✔
133	update,
134	structured_control_flow::ScheduleType_Sequential::create(),	8✔
135	{},	8✔
136	debug_info
137	);
138	last_scope = &last_map->root();	8✔
139	out_subset.push_back(indvar);	8✔
140	parallel_syms.push_back(indvar);	8✔
141	}	8✔
142
143	/************************
144	* Reduction dimensions *
145	************************/
146	// For convolution, we reduce over input channels and kernel dimensions.
147	// Assuming weight tensor layout (M, C, k1, k2, ...), skip the first dim (output channels).
148	std::vector<symbolic::Expression> reduction_syms;	2✔
149	structured_control_flow::For* last_for = nullptr;	2✔
150	for (size_t dim = 0; dim < this->kernel_shape_.size(); ++dim) {	6✔
151	std::string indvar_str = builder.find_new_name("_r");	4✔
152	builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));	4✔
153
154	auto indvar = symbolic::symbol(indvar_str);	4✔
155	auto init = symbolic::zero();	4✔
156	auto update = symbolic::add(indvar, symbolic::one());	4✔
157	auto condition = symbolic::Lt(indvar, symbolic::integer(this->kernel_shape_[dim]));	4✔
158
159	last_for = &builder.add_for(*last_scope, indvar, condition, init, update, {}, debug_info);	4✔
160	last_scope = &last_for->root();	4✔
161	reduction_syms.push_back(indvar);	4✔
162	}	4✔
163
164	// Add innermost code block – convolution computation.
165	auto& code_block = builder.add_block(*last_scope, {}, debug_info);	2✔
166
167	// Reuse debug infos from original access nodes (if available).
168	const DebugInfos& dbg_X = builder.debug_info().get_region(iedge_X->src().debug_info().indices());	2✔
169	const DebugInfos& dbg_W = builder.debug_info().get_region(iedge_W->src().debug_info().indices());	2✔
170	const DebugInfos& dbg_Y = builder.debug_info().get_region(oedge_Y->dst().debug_info().indices());	2✔
171	const DebugInfos& dbg_B = (iedge_B != nullptr)	2✔
172	? builder.debug_info().get_region(iedge_B->src().debug_info().indices())	×
173	: DebugInfos();	2✔
174
175	// Create new access nodes inside the innermost block.
176	auto& X_acc = builder.add_access(code_block, X_name, dbg_X);	2✔
177	auto& W_acc = builder.add_access(code_block, W_name, dbg_W);	2✔
178	auto& Y_acc_in = builder.add_access(code_block, Y_name, dbg_Y);	2✔
179	auto& Y_acc_out = builder.add_access(code_block, Y_name, dbg_Y);	2✔
180	// Bias handled after reduction loops; no need to access B inside the reduction tasklet.
181
182	/********************
183	* Build subsets *
184	********************/
185	// Helper lambdas to safely fetch stride/dilation/pad values.
186	auto int_expr = [](size_t v) { return symbolic::integer(static_cast<int64_t>(v)); };	14✔
187
188	auto get_stride = [&](size_t idx) -> symbolic::Expression {	6✔
189	if (idx < strides_.size()) {	4✔
190	return int_expr(strides_[idx]);	4✔
191	}
192	return symbolic::one();	×
193	};	4✔
194
195	auto get_dilation = [&](size_t idx) -> symbolic::Expression {	6✔
196	if (idx < dilations_.size()) {	4✔
197	return int_expr(dilations_[idx]);	4✔
198	}
199	return symbolic::one();	×
200	};	4✔
201
202	auto get_pad = [&](size_t idx) -> symbolic::Expression {	6✔
203	if (idx < pads_.size()) {	4✔
204	return int_expr(pads_[idx]);	4✔
205	}
206	return symbolic::zero();	×
207	};	4✔
208
209	const size_t spatial_dims = kernel_shape_.size();	2✔
210
211	// Extract commonly-used indices.
212	auto get_parallel_sym = [&](size_t idx) -> symbolic::Expression {	10✔
213	if (idx < parallel_syms.size()) return parallel_syms[idx];	8✔
214	return symbolic::zero();	×
215	};	8✔
216
217	auto get_reduction_sym = [&](size_t idx) -> symbolic::Expression {	12✔
218	if (idx < reduction_syms.size()) return reduction_syms[idx];	10✔
219	return symbolic::zero();	4✔
220	};	10✔
221
222	auto N_idx = get_parallel_sym(0);	2✔
223	auto M_idx = get_parallel_sym(1);	2✔
224
225	// Input channel and kernel indices come from reduction variables.
226	auto C_idx = get_reduction_sym(0);	2✔
227
228	// Build X subset.
229	data_flow::Subset subset_X;	2✔
230	subset_X.push_back(N_idx); // Batch dim	2✔
231	subset_X.push_back(C_idx); // Input channel dim	2✔
232	for (size_t d = 0; d < spatial_dims; ++d) {	6✔
233	symbolic::Expression out_d = get_parallel_sym(2 + d);	4✔
234	symbolic::Expression ker_d = get_reduction_sym(1 + d);	4✔
235
236	auto in_d = symbolic::	4✔
237	sub(symbolic::add(symbolic::mul(out_d, get_stride(d)), symbolic::mul(ker_d, get_dilation(d))), get_pad(d));	4✔
238	subset_X.push_back(in_d);	4✔
239	}	4✔
240
241	// Build W subset.
242	data_flow::Subset subset_W;	2✔
243	subset_W.push_back(M_idx); // Output channel (filter)	2✔
244	subset_W.push_back(C_idx); // Input channel	2✔
245	for (size_t d = 0; d < spatial_dims; ++d) {	6✔
246	symbolic::Expression ker_d = get_reduction_sym(1 + d);	4✔
247	subset_W.push_back(ker_d);	4✔
248	}	4✔
249
250	// Output Y subset is simply the parallel indices computed earlier.
251	data_flow::Subset subset_Y = out_subset;	2✔
252
253	// Bias subset (only along output channels).
254	data_flow::Subset subset_B;	2✔
255	if (has_bias_) {	2✔
256	subset_B.push_back(M_idx);	×
257	}	×
258
259	/************************
260	* Add computation node *
261	************************/
262	// Create tasklet performing fused-multiply-add: _out = _x * _w + _y
263	if (has_bias_) {	2✔
264	// Bias will be added after reduction, so no change here.
265	}	×
266
267	auto& tasklet =	2✔
268	builder.add_tasklet(code_block, data_flow::TaskletCode::fma, "_out", {"_x", "_w", "_y"}, debug_info);	2✔
269
270	// Connect memlets.
271	builder.add_computational_memlet(code_block, X_acc, tasklet, "_x", subset_X, iedge_X->base_type(), debug_info);	2✔
272	builder.add_computational_memlet(code_block, W_acc, tasklet, "_w", subset_W, iedge_W->base_type(), debug_info);	2✔
273	builder.add_computational_memlet(code_block, Y_acc_in, tasklet, "_y", subset_Y, oedge_Y->base_type(), debug_info);	2✔
274	builder.add_computational_memlet(code_block, tasklet, "_out", Y_acc_out, subset_Y, oedge_Y->base_type(), debug_info);	2✔
275
276	// Bias: add once per output element outside reduction loops.
277	if (has_bias_) {	2✔
278	// Insert after the reduction loops (i.e., right after they finish).
279	// We add a single tasklet in the parent scope (last_map root).
280	std::string B_name = static_cast<const data_flow::AccessNode&>(iedge_B->src()).data();	×
NEW 281	auto& bias_block = builder.add_block(last_map->root(), {}, debug_info);	×
282	auto& B_acc_local = builder.add_access(bias_block, B_name, dbg_B);	×
283	auto& Y_acc2_in = builder.add_access(bias_block, Y_name, dbg_Y);	×
284	auto& Y_acc2_out = builder.add_access(bias_block, Y_name, dbg_Y);	×
285
NEW 286	auto& bias_tasklet =	×
NEW 287	builder.add_tasklet(bias_block, data_flow::TaskletCode::add, "_out", {"_bias", "_y"}, debug_info);	×
288	builder.add_computational_memlet(	×
NEW 289	bias_block, B_acc_local, bias_tasklet, "_bias", subset_B, iedge_B->base_type(), debug_info	×
290	);
291	builder.add_computational_memlet(	×
NEW 292	bias_block, Y_acc2_in, bias_tasklet, "_y", subset_Y, oedge_Y->base_type(), debug_info	×
293	);
294	builder.add_computational_memlet(	×
NEW 295	bias_block, bias_tasklet, "_out", Y_acc2_out, subset_Y, oedge_Y->base_type(), debug_info	×
296	);
297	}	×
298
299	// Clean up block
300	builder.remove_memlet(block, *iedge_X);	2✔
301	builder.remove_memlet(block, *iedge_W);	2✔
302	if (iedge_B != nullptr) {	2✔
303	builder.remove_memlet(block, *iedge_B);	×
304	}	×
305	builder.remove_memlet(block, *oedge_Y);	2✔
306	builder.remove_node(block, *this);	2✔
307	builder.remove_child(parent, index + 1);	2✔
308
309	return true;
310	}	2✔
311
312	std::unique_ptr<data_flow::DataFlowNode> ConvNode::
313	clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {	×
314	return std::unique_ptr<data_flow::DataFlowNode>(new ConvNode(	×
315	element_id,	×
316	this->debug_info(),	×
317	vertex,	×
318	parent,	×
NEW 319	this->shape_,	×
320	this->has_bias_,	×
321	this->dilations_,	×
322	this->kernel_shape_,	×
323	this->pads_,	×
324	this->strides_	×
325	));
326	}	×
327
328	nlohmann::json ConvNodeSerializer::serialize(const data_flow::LibraryNode& library_node) {	×
NEW 329	const ConvNode& node = static_cast<const ConvNode&>(library_node);	×
330	nlohmann::json j;	×
331
NEW 332	j["code"] = node.code().value();	×
NEW 333	j["outputs"] = node.outputs();	×
NEW 334	j["inputs"] = node.inputs();	×
NEW 335	j["has_bias"] = node.has_bias();	×
NEW 336	j["dilations"] = node.dilations();	×
NEW 337	j["kernel_shape"] = node.kernel_shape();	×
NEW 338	j["pads"] = node.pads();	×
NEW 339	j["strides"] = node.strides();	×
340
NEW 341	serializer::JSONSerializer serializer;	×
NEW 342	j["shape"] = nlohmann::json::array();	×
NEW 343	for (auto& dim : node.shape()) {	×
NEW 344	j["shape"].push_back(serializer.expression(dim));	×
345	}
346
347	return j;	×
348	}	×
349
350	data_flow::LibraryNode& ConvNodeSerializer::deserialize(	×
351	const nlohmann::json& j, builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
352	) {
353	auto code = j["code"].get<std::string>();	×
354	if (code != LibraryNodeType_Conv.value()) {	×
355	throw std::runtime_error("Invalid library node code");	×
356	}
357
358	sdfg::serializer::JSONSerializer serializer;	×
359	DebugInfoRegion debug_info = serializer.json_to_debug_info_region(j["debug_info"], builder.debug_info());	×
360
361	auto outputs = j.at("outputs").get<std::vector<std::string>>();	×
362	auto inputs = j.at("inputs").get<std::vector<std::string>>();	×
363	auto has_bias = j.at("has_bias").get<bool>();	×
364	auto dilations = j.at("dilations").get<std::vector<size_t>>();	×
365	auto kernel_shape = j.at("kernel_shape").get<std::vector<size_t>>();	×
366	auto pads = j.at("pads").get<std::vector<size_t>>();	×
367	auto strides = j.at("strides").get<std::vector<size_t>>();	×
368
NEW 369	std::vector<symbolic::Expression> shape;	×
NEW 370	for (const auto& dim : j["shape"]) {	×
NEW 371	shape.push_back(SymEngine::Expression(dim.get<std::string>()));	×
372	}
373
NEW 374	return builder	×
NEW 375	.add_library_node<ConvNode>(parent, debug_info, shape, has_bias, dilations, kernel_shape, pads, strides);	×
UNCOV 376	}	×
377
378	} // namespace ml
379	} // namespace math
380	} // namespace sdfg

daisytuner / sdfglib / 17651658650

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous