17637380013

Committed 11 Sep 2025 07:29AM UTC coverage: 59.755% (+0.6%) from 59.145%

Build # 17637380013

Build Type

push

github

Committed by

web-flow

Commit Message

New debug info (#210)

* initial draft

* update data structure and construction logic

* finalize DebugInfo draft

* fix tests

* Update serializer and fix tests

* fix append bug

* update data structure

* sdfg builder update

* const ref vectors

* update implementation and partial tests

* compiling state

* update serializer interface

* update dot test

* reset interface to debug_info in json to maintain compatibility with tools

* first review batch

* second batch of changes

* merge fixes

Run Details

777 of 1111 new or added lines in 46 files covered. (69.94%)

11 existing lines in 11 files now uncovered.

9755 of 16325 relevant lines covered (59.75%)

115.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

65.36

/src/data_flow/library_nodes/math/ml/conv.cpp

#include "sdfg/data_flow/library_nodes/math/ml/conv.h"

#include "sdfg/analysis/analysis.h"
#include "sdfg/builder/structured_sdfg_builder.h"

#include "sdfg/analysis/scope_analysis.h"
#include "sdfg/debug_info.h"

namespace sdfg {
namespace math {
namespace ml {

ConvNode::ConvNode(
    size_t element_id,
    const DebugInfoRegion& debug_info,
    const graph::Vertex vertex,
    data_flow::DataFlowGraph& parent,
    bool has_bias,
    std::vector<size_t> dilations,
    std::vector<size_t> kernel_shape,
    std::vector<size_t> pads,
    std::vector<size_t> strides
)
    : MathNode(
          element_id,
          debug_info,
          vertex,
          parent,
          LibraryNodeType_Conv,
          {"Y"},
          {"X", "W"},
          data_flow::ImplementationType_NONE
      ),
      has_bias_(has_bias), dilations_(dilations), kernel_shape_(kernel_shape), pads_(pads), strides_(strides) {
    if (has_bias_) {
        this->inputs_.push_back("B");
    }
}

void ConvNode::validate(const Function& function) const {
    // TODO: Implement
}

bool ConvNode::has_bias() const { return has_bias_; }

std::vector<size_t> ConvNode::dilations() const { return dilations_; }

std::vector<size_t> ConvNode::kernel_shape() const { return kernel_shape_; }

std::vector<size_t> ConvNode::pads() const { return pads_; }

std::vector<size_t> ConvNode::strides() const { return strides_; }

bool ConvNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& sdfg = builder.subject();
    auto& dataflow = this->get_parent();
    auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());

    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
    auto& parent = static_cast<structured_control_flow::Sequence&>(*scope_analysis.parent_scope(&block));
    int index = parent.index(block);
    auto& transition = parent.at(index).second;

    const data_flow::Memlet* iedge_X = nullptr;
    const data_flow::Memlet* iedge_W = nullptr;
    const data_flow::Memlet* iedge_B = nullptr;
    for (const auto& iedge : dataflow.in_edges(*this)) {
        if (iedge.dst_conn() == "X") {
            iedge_X = &iedge;
        } else if (iedge.dst_conn() == "W") {
            iedge_W = &iedge;
        } else if (iedge.dst_conn() == "B") {
            iedge_B = &iedge;
        }
    }

    const data_flow::Memlet* oedge_Y = nullptr;
    for (const auto& oedge : dataflow.out_edges(*this)) {
        if (oedge.src_conn() == "Y") {
            oedge_Y = &oedge;
            break;
        }
    }

    // Find names of input and output containers
    std::string X_name = static_cast<const data_flow::AccessNode&>(iedge_X->src()).data();
    std::string W_name = static_cast<const data_flow::AccessNode&>(iedge_W->src()).data();
    std::string Y_name = static_cast<const data_flow::AccessNode&>(oedge_Y->dst()).data();

    data_flow::Subset dims_X = iedge_X->end_subset();
    data_flow::Subset dims_W = iedge_W->end_subset();
    data_flow::Subset dims_B;
    if (iedge_B != nullptr) {
        dims_B = iedge_B->end_subset();
    }
    data_flow::Subset dims_Y = oedge_Y->end_subset();

    auto& new_sequence = builder.add_sequence_before(
        parent, block, transition.assignments(), builder.debug_info().get_region(block.debug_info().indices())
    );
    structured_control_flow::Sequence* last_scope = &new_sequence;

    /************************
     * Parallel dimensions *
     ************************/
    // Generate one Map per parallel dimension of the output tensor (Y).
    const auto& begin_Y_subset = oedge_Y->begin_subset();
    const auto& end_Y_subset = oedge_Y->end_subset();

    data_flow::Subset out_subset;
    std::vector<symbolic::Expression> parallel_syms;
    structured_control_flow::Map* last_map = nullptr;
    for (size_t dim = 0; dim < begin_Y_subset.size(); ++dim) {
        const auto& dim_begin = begin_Y_subset[dim];
        const auto& dim_end = end_Y_subset[dim];

        std::string indvar_str = builder.find_new_name("_i");
        builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));

        auto indvar = symbolic::symbol(indvar_str);
        auto init = dim_begin;
        auto update = symbolic::add(indvar, symbolic::one());
        auto condition = symbolic::Lt(indvar, symbolic::add(dim_end, symbolic::one()));

        last_map = &builder.add_map(
            *last_scope,
            indvar,
            condition,
            init,
            update,
            structured_control_flow::ScheduleType_Sequential::create(),
            {},
            builder.debug_info().get_region(block.debug_info().indices())
        );
        last_scope = &last_map->root();
        out_subset.push_back(indvar);
        parallel_syms.push_back(indvar);
    }

    /************************
     * Reduction dimensions *
     ************************/
    // For convolution, we reduce over input channels and kernel dimensions.
    // Assuming weight tensor layout (M, C, k1, k2, ...), skip the first dim (output channels).
    const auto& begin_W_subset = iedge_W->begin_subset();
    const auto& end_W_subset = iedge_W->end_subset();

    std::vector<symbolic::Expression> reduction_syms;
    structured_control_flow::For* last_for = nullptr;
    for (size_t dim = 1; dim < begin_W_subset.size(); ++dim) {
        const auto& dim_begin = begin_W_subset[dim];
        const auto& dim_end = end_W_subset[dim];

        std::string indvar_str = builder.find_new_name("_r");
        builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));

        auto indvar = symbolic::symbol(indvar_str);
        auto init = dim_begin;
        auto update = symbolic::add(indvar, symbolic::one());
        auto condition = symbolic::Lt(indvar, symbolic::add(dim_end, symbolic::one()));

        last_for = &builder.add_for(
            *last_scope,
            indvar,
            condition,
            init,
            update,
            {},
            builder.debug_info().get_region(block.debug_info().indices())
        );
        last_scope = &last_for->root();
        reduction_syms.push_back(indvar);
    }

    // Add innermost code block – convolution computation.
    auto& code_block =
        builder.add_block(*last_scope, {}, builder.debug_info().get_region(block.debug_info().indices()));

    // Reuse debug infos from original access nodes (if available).
    const DebugInfos& dbg_X = builder.debug_info().get_region(iedge_X->src().debug_info().indices());
    const DebugInfos& dbg_W = builder.debug_info().get_region(iedge_W->src().debug_info().indices());
    const DebugInfos& dbg_Y = builder.debug_info().get_region(oedge_Y->dst().debug_info().indices());
    const DebugInfos& dbg_B = (iedge_B != nullptr)
                                  ? builder.debug_info().get_region(iedge_B->src().debug_info().indices())
                                  : DebugInfos();

    // Create new access nodes inside the innermost block.
    auto& X_acc = builder.add_access(code_block, X_name, dbg_X);
    auto& W_acc = builder.add_access(code_block, W_name, dbg_W);
    auto& Y_acc_in = builder.add_access(code_block, Y_name, dbg_Y);
    auto& Y_acc_out = builder.add_access(code_block, Y_name, dbg_Y);
    // Bias handled after reduction loops; no need to access B inside the reduction tasklet.

    /********************
     * Build subsets    *
     ********************/
    // Helper lambdas to safely fetch stride/dilation/pad values.
    auto int_expr = [](size_t v) { return symbolic::integer(static_cast<int64_t>(v)); };

    auto get_stride = [&](size_t idx) -> symbolic::Expression {
        if (idx < strides_.size()) {
            return int_expr(strides_[idx]);
        }
        return symbolic::one();
    };

    auto get_dilation = [&](size_t idx) -> symbolic::Expression {
        if (idx < dilations_.size()) {
            return int_expr(dilations_[idx]);
        }
        return symbolic::one();
    };

    auto get_pad = [&](size_t idx) -> symbolic::Expression {
        if (idx < pads_.size()) {
            return int_expr(pads_[idx]);
        }
        return symbolic::zero();
    };

    const size_t spatial_dims = kernel_shape_.size();

    // Extract commonly-used indices.
    auto get_parallel_sym = [&](size_t idx) -> symbolic::Expression {
        if (idx < parallel_syms.size()) return parallel_syms[idx];
        return symbolic::zero();
    };

    auto get_reduction_sym = [&](size_t idx) -> symbolic::Expression {
        if (idx < reduction_syms.size()) return reduction_syms[idx];
        return symbolic::zero();
    };

    auto N_idx = get_parallel_sym(0);
    auto M_idx = get_parallel_sym(1);

    // Input channel and kernel indices come from reduction variables.
    auto C_idx = get_reduction_sym(0);

    // Build X subset.
    data_flow::Subset subset_X;
    subset_X.push_back(N_idx); // Batch dim
    subset_X.push_back(C_idx); // Input channel dim
    for (size_t d = 0; d < spatial_dims; ++d) {
        symbolic::Expression out_d = get_parallel_sym(2 + d);
        symbolic::Expression ker_d = get_reduction_sym(1 + d);

        auto in_d = symbolic::
            sub(symbolic::add(symbolic::mul(out_d, get_stride(d)), symbolic::mul(ker_d, get_dilation(d))), get_pad(d));
        subset_X.push_back(in_d);
    }

    // Build W subset.
    data_flow::Subset subset_W;
    subset_W.push_back(M_idx); // Output channel (filter)
    subset_W.push_back(C_idx); // Input channel
    for (size_t d = 0; d < spatial_dims; ++d) {
        symbolic::Expression ker_d = get_reduction_sym(1 + d);
        subset_W.push_back(ker_d);
    }

    // Output Y subset is simply the parallel indices computed earlier.
    data_flow::Subset subset_Y = out_subset;

    // Bias subset (only along output channels).
    data_flow::Subset subset_B;
    if (has_bias_) {
        subset_B.push_back(M_idx);
    }

    /************************
     * Add computation node *
     ************************/
    // Create tasklet performing fused-multiply-add: _out = _x * _w + _y
    if (has_bias_) {
        // Bias will be added after reduction, so no change here.
    }

    auto& tasklet = builder.add_tasklet(
        code_block,
        data_flow::TaskletCode::fma,
        "_out",
        {"_x", "_w", "_y"},
        builder.debug_info().get_region(block.debug_info().indices())
    );

    // Connect memlets.
    builder.add_computational_memlet(
        code_block,
        X_acc,
        tasklet,
        "_x",
        subset_X,
        iedge_X->base_type(),
        builder.debug_info().get_region(block.debug_info().indices())
    );
    builder.add_computational_memlet(
        code_block,
        W_acc,
        tasklet,
        "_w",
        subset_W,
        iedge_W->base_type(),
        builder.debug_info().get_region(block.debug_info().indices())
    );
    builder.add_computational_memlet(
        code_block,
        Y_acc_in,
        tasklet,
        "_y",
        subset_Y,
        oedge_Y->base_type(),
        builder.debug_info().get_region(block.debug_info().indices())
    );
    builder.add_computational_memlet(
        code_block,
        tasklet,
        "_out",
        Y_acc_out,
        subset_Y,
        oedge_Y->base_type(),
        builder.debug_info().get_region(block.debug_info().indices())
    );

    // Bias: add once per output element outside reduction loops.
    if (has_bias_) {
        // Insert after the reduction loops (i.e., right after they finish).
        // We add a single tasklet in the parent scope (last_map root).
        std::string B_name = static_cast<const data_flow::AccessNode&>(iedge_B->src()).data();
        auto& bias_block =
            builder.add_block(last_map->root(), {}, builder.debug_info().get_region(block.debug_info().indices()));
        auto& B_acc_local = builder.add_access(bias_block, B_name, dbg_B);
        auto& Y_acc2_in = builder.add_access(bias_block, Y_name, dbg_Y);
        auto& Y_acc2_out = builder.add_access(bias_block, Y_name, dbg_Y);

        auto& bias_tasklet = builder.add_tasklet(
            bias_block,
            data_flow::TaskletCode::add,
            "_out",
            {"_bias", "_y"},
            builder.debug_info().get_region(block.debug_info().indices())
        );
        builder.add_computational_memlet(
            bias_block,
            B_acc_local,
            bias_tasklet,
            "_bias",
            subset_B,
            iedge_B->base_type(),
            builder.debug_info().get_region(block.debug_info().indices())
        );
        builder.add_computational_memlet(
            bias_block,
            Y_acc2_in,
            bias_tasklet,
            "_y",
            subset_Y,
            oedge_Y->base_type(),
            builder.debug_info().get_region(block.debug_info().indices())
        );
        builder.add_computational_memlet(
            bias_block,
            bias_tasklet,
            "_out",
            Y_acc2_out,
            subset_Y,
            oedge_Y->base_type(),
            builder.debug_info().get_region(block.debug_info().indices())
        );
    }

    // Clean up block
    builder.remove_memlet(block, *iedge_X);
    builder.remove_memlet(block, *iedge_W);
    if (iedge_B != nullptr) {
        builder.remove_memlet(block, *iedge_B);
    }
    builder.remove_memlet(block, *oedge_Y);
    builder.remove_node(block, *this);
    builder.remove_child(parent, index + 1);

    return true;
}

std::unique_ptr<data_flow::DataFlowNode> ConvNode::
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
    return std::unique_ptr<data_flow::DataFlowNode>(new ConvNode(
        element_id,
        this->debug_info(),
        vertex,
        parent,
        this->has_bias_,
        this->dilations_,
        this->kernel_shape_,
        this->pads_,
        this->strides_
    ));
}

nlohmann::json ConvNodeSerializer::serialize(const data_flow::LibraryNode& library_node) {
    const ConvNode& relu_node = static_cast<const ConvNode&>(library_node);
    nlohmann::json j;

    j["code"] = relu_node.code().value();
    j["outputs"] = relu_node.outputs();
    j["inputs"] = relu_node.inputs();
    j["has_bias"] = relu_node.has_bias();
    j["dilations"] = relu_node.dilations();
    j["kernel_shape"] = relu_node.kernel_shape();
    j["pads"] = relu_node.pads();
    j["strides"] = relu_node.strides();

    return j;
}

data_flow::LibraryNode& ConvNodeSerializer::deserialize(
    const nlohmann::json& j, builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
) {
    auto code = j["code"].get<std::string>();
    if (code != LibraryNodeType_Conv.value()) {
        throw std::runtime_error("Invalid library node code");
    }

    sdfg::serializer::JSONSerializer serializer;
    DebugInfoRegion debug_info = serializer.json_to_debug_info_region(j["debug_info"], builder.debug_info());

    auto outputs = j.at("outputs").get<std::vector<std::string>>();
    auto inputs = j.at("inputs").get<std::vector<std::string>>();
    auto has_bias = j.at("has_bias").get<bool>();
    auto dilations = j.at("dilations").get<std::vector<size_t>>();
    auto kernel_shape = j.at("kernel_shape").get<std::vector<size_t>>();
    auto pads = j.at("pads").get<std::vector<size_t>>();
    auto strides = j.at("strides").get<std::vector<size_t>>();

    return builder.add_library_node<ConvNode>(parent, debug_info, has_bias, dilations, kernel_shape, pads, strides);
}

} // namespace ml
} // namespace math
} // namespace sdfg

1	#include "sdfg/data_flow/library_nodes/math/ml/conv.h"
2
3	#include "sdfg/analysis/analysis.h"
4	#include "sdfg/builder/structured_sdfg_builder.h"
5
6	#include "sdfg/analysis/scope_analysis.h"
7	#include "sdfg/debug_info.h"
8
9	namespace sdfg {
10	namespace math {
11	namespace ml {
12
13	ConvNode::ConvNode(	2✔
14	size_t element_id,
15	const DebugInfoRegion& debug_info,
16	const graph::Vertex vertex,
17	data_flow::DataFlowGraph& parent,
18	bool has_bias,
19	std::vector<size_t> dilations,
20	std::vector<size_t> kernel_shape,
21	std::vector<size_t> pads,
22	std::vector<size_t> strides
23	)
24	: MathNode(	2✔
25	element_id,	2✔
26	debug_info,	2✔
27	vertex,	2✔
28	parent,	2✔
29	LibraryNodeType_Conv,
30	{"Y"},	2✔
31	{"X", "W"},	2✔
32	data_flow::ImplementationType_NONE
33	),
34	has_bias_(has_bias), dilations_(dilations), kernel_shape_(kernel_shape), pads_(pads), strides_(strides) {	2✔
35	if (has_bias_) {	2✔
36	this->inputs_.push_back("B");	×
37	}	×
38	}	2✔
39
40	void ConvNode::validate(const Function& function) const {	×
41	// TODO: Implement
42	}	×
43
44	bool ConvNode::has_bias() const { return has_bias_; }	×
45
46	std::vector<size_t> ConvNode::dilations() const { return dilations_; }	×
47
48	std::vector<size_t> ConvNode::kernel_shape() const { return kernel_shape_; }	×
49
50	std::vector<size_t> ConvNode::pads() const { return pads_; }	×
51
52	std::vector<size_t> ConvNode::strides() const { return strides_; }	×
53
54	bool ConvNode::expand(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	2✔
55	auto& sdfg = builder.subject();	2✔
56	auto& dataflow = this->get_parent();	2✔
57	auto& block = static_cast<structured_control_flow::Block&>(*dataflow.get_parent());	2✔
58
59	auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();	2✔
60	auto& parent = static_cast<structured_control_flow::Sequence&>(*scope_analysis.parent_scope(&block));	2✔
61	int index = parent.index(block);	2✔
62	auto& transition = parent.at(index).second;	2✔
63
64	const data_flow::Memlet* iedge_X = nullptr;	2✔
65	const data_flow::Memlet* iedge_W = nullptr;	2✔
66	const data_flow::Memlet* iedge_B = nullptr;	2✔
67	for (const auto& iedge : dataflow.in_edges(*this)) {	6✔
68	if (iedge.dst_conn() == "X") {	4✔
69	iedge_X = &iedge;	2✔
70	} else if (iedge.dst_conn() == "W") {	4✔
71	iedge_W = &iedge;	2✔
72	} else if (iedge.dst_conn() == "B") {	2✔
73	iedge_B = &iedge;	×
74	}	×
75	}
76
77	const data_flow::Memlet* oedge_Y = nullptr;	2✔
78	for (const auto& oedge : dataflow.out_edges(*this)) {	2✔
79	if (oedge.src_conn() == "Y") {	2✔
80	oedge_Y = &oedge;	2✔
81	break;	2✔
82	}
83	}
84
85	// Find names of input and output containers
86	std::string X_name = static_cast<const data_flow::AccessNode&>(iedge_X->src()).data();	2✔
87	std::string W_name = static_cast<const data_flow::AccessNode&>(iedge_W->src()).data();	2✔
88	std::string Y_name = static_cast<const data_flow::AccessNode&>(oedge_Y->dst()).data();	2✔
89
90	data_flow::Subset dims_X = iedge_X->end_subset();	2✔
91	data_flow::Subset dims_W = iedge_W->end_subset();	2✔
92	data_flow::Subset dims_B;	2✔
93	if (iedge_B != nullptr) {	2✔
94	dims_B = iedge_B->end_subset();	×
95	}	×
96	data_flow::Subset dims_Y = oedge_Y->end_subset();	2✔
97
98	auto& new_sequence = builder.add_sequence_before(	4✔
99	parent, block, transition.assignments(), builder.debug_info().get_region(block.debug_info().indices())	2✔
100	);
101	structured_control_flow::Sequence* last_scope = &new_sequence;	2✔
102
103	/************************
104	* Parallel dimensions *
105	************************/
106	// Generate one Map per parallel dimension of the output tensor (Y).
107	const auto& begin_Y_subset = oedge_Y->begin_subset();	2✔
108	const auto& end_Y_subset = oedge_Y->end_subset();	2✔
109
110	data_flow::Subset out_subset;	2✔
111	std::vector<symbolic::Expression> parallel_syms;	2✔
112	structured_control_flow::Map* last_map = nullptr;	2✔
113	for (size_t dim = 0; dim < begin_Y_subset.size(); ++dim) {	10✔
114	const auto& dim_begin = begin_Y_subset[dim];	8✔
115	const auto& dim_end = end_Y_subset[dim];	8✔
116
117	std::string indvar_str = builder.find_new_name("_i");	8✔
118	builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));	8✔
119
120	auto indvar = symbolic::symbol(indvar_str);	8✔
121	auto init = dim_begin;	8✔
122	auto update = symbolic::add(indvar, symbolic::one());	8✔
123	auto condition = symbolic::Lt(indvar, symbolic::add(dim_end, symbolic::one()));	8✔
124
125	last_map = &builder.add_map(	16✔
126	*last_scope,	8✔
127	indvar,
128	condition,
129	init,
130	update,
131	structured_control_flow::ScheduleType_Sequential::create(),	8✔
132	{},	8✔
133	builder.debug_info().get_region(block.debug_info().indices())	8✔
134	);
135	last_scope = &last_map->root();	8✔
136	out_subset.push_back(indvar);	8✔
137	parallel_syms.push_back(indvar);	8✔
138	}	8✔
139
140	/************************
141	* Reduction dimensions *
142	************************/
143	// For convolution, we reduce over input channels and kernel dimensions.
144	// Assuming weight tensor layout (M, C, k1, k2, ...), skip the first dim (output channels).
145	const auto& begin_W_subset = iedge_W->begin_subset();	2✔
146	const auto& end_W_subset = iedge_W->end_subset();	2✔
147
148	std::vector<symbolic::Expression> reduction_syms;	2✔
149	structured_control_flow::For* last_for = nullptr;	2✔
150	for (size_t dim = 1; dim < begin_W_subset.size(); ++dim) {	8✔
151	const auto& dim_begin = begin_W_subset[dim];	6✔
152	const auto& dim_end = end_W_subset[dim];	6✔
153
154	std::string indvar_str = builder.find_new_name("_r");	6✔
155	builder.add_container(indvar_str, types::Scalar(types::PrimitiveType::UInt64));	6✔
156
157	auto indvar = symbolic::symbol(indvar_str);	6✔
158	auto init = dim_begin;	6✔
159	auto update = symbolic::add(indvar, symbolic::one());	6✔
160	auto condition = symbolic::Lt(indvar, symbolic::add(dim_end, symbolic::one()));	6✔
161
162	last_for = &builder.add_for(	12✔
163	*last_scope,	6✔
164	indvar,
165	condition,
166	init,
167	update,
168	{},	6✔
169	builder.debug_info().get_region(block.debug_info().indices())	6✔
170	);
171	last_scope = &last_for->root();	6✔
172	reduction_syms.push_back(indvar);	6✔
173	}	6✔
174
175	// Add innermost code block – convolution computation.
176	auto& code_block =	2✔
177	builder.add_block(*last_scope, {}, builder.debug_info().get_region(block.debug_info().indices()));	2✔
178
179	// Reuse debug infos from original access nodes (if available).
180	const DebugInfos& dbg_X = builder.debug_info().get_region(iedge_X->src().debug_info().indices());	2✔
181	const DebugInfos& dbg_W = builder.debug_info().get_region(iedge_W->src().debug_info().indices());	2✔
182	const DebugInfos& dbg_Y = builder.debug_info().get_region(oedge_Y->dst().debug_info().indices());	2✔
183	const DebugInfos& dbg_B = (iedge_B != nullptr)	2✔
NEW 184	? builder.debug_info().get_region(iedge_B->src().debug_info().indices())	×
185	: DebugInfos();	2✔
186
187	// Create new access nodes inside the innermost block.
188	auto& X_acc = builder.add_access(code_block, X_name, dbg_X);	2✔
189	auto& W_acc = builder.add_access(code_block, W_name, dbg_W);	2✔
190	auto& Y_acc_in = builder.add_access(code_block, Y_name, dbg_Y);	2✔
191	auto& Y_acc_out = builder.add_access(code_block, Y_name, dbg_Y);	2✔
192	// Bias handled after reduction loops; no need to access B inside the reduction tasklet.
193
194	/********************
195	* Build subsets *
196	********************/
197	// Helper lambdas to safely fetch stride/dilation/pad values.
198	auto int_expr = [](size_t v) { return symbolic::integer(static_cast<int64_t>(v)); };	14✔
199
200	auto get_stride = [&](size_t idx) -> symbolic::Expression {	6✔
201	if (idx < strides_.size()) {	4✔
202	return int_expr(strides_[idx]);	4✔
203	}
204	return symbolic::one();	×
205	};	4✔
206
207	auto get_dilation = [&](size_t idx) -> symbolic::Expression {	6✔
208	if (idx < dilations_.size()) {	4✔
209	return int_expr(dilations_[idx]);	4✔
210	}
211	return symbolic::one();	×
212	};	4✔
213
214	auto get_pad = [&](size_t idx) -> symbolic::Expression {	6✔
215	if (idx < pads_.size()) {	4✔
216	return int_expr(pads_[idx]);	4✔
217	}
218	return symbolic::zero();	×
219	};	4✔
220
221	const size_t spatial_dims = kernel_shape_.size();	2✔
222
223	// Extract commonly-used indices.
224	auto get_parallel_sym = [&](size_t idx) -> symbolic::Expression {	10✔
225	if (idx < parallel_syms.size()) return parallel_syms[idx];	8✔
226	return symbolic::zero();	×
227	};	8✔
228
229	auto get_reduction_sym = [&](size_t idx) -> symbolic::Expression {	12✔
230	if (idx < reduction_syms.size()) return reduction_syms[idx];	10✔
231	return symbolic::zero();	×
232	};	10✔
233
234	auto N_idx = get_parallel_sym(0);	2✔
235	auto M_idx = get_parallel_sym(1);	2✔
236
237	// Input channel and kernel indices come from reduction variables.
238	auto C_idx = get_reduction_sym(0);	2✔
239
240	// Build X subset.
241	data_flow::Subset subset_X;	2✔
242	subset_X.push_back(N_idx); // Batch dim	2✔
243	subset_X.push_back(C_idx); // Input channel dim	2✔
244	for (size_t d = 0; d < spatial_dims; ++d) {	6✔
245	symbolic::Expression out_d = get_parallel_sym(2 + d);	4✔
246	symbolic::Expression ker_d = get_reduction_sym(1 + d);	4✔
247
248	auto in_d = symbolic::	4✔
249	sub(symbolic::add(symbolic::mul(out_d, get_stride(d)), symbolic::mul(ker_d, get_dilation(d))), get_pad(d));	4✔
250	subset_X.push_back(in_d);	4✔
251	}	4✔
252
253	// Build W subset.
254	data_flow::Subset subset_W;	2✔
255	subset_W.push_back(M_idx); // Output channel (filter)	2✔
256	subset_W.push_back(C_idx); // Input channel	2✔
257	for (size_t d = 0; d < spatial_dims; ++d) {	6✔
258	symbolic::Expression ker_d = get_reduction_sym(1 + d);	4✔
259	subset_W.push_back(ker_d);	4✔
260	}	4✔
261
262	// Output Y subset is simply the parallel indices computed earlier.
263	data_flow::Subset subset_Y = out_subset;	2✔
264
265	// Bias subset (only along output channels).
266	data_flow::Subset subset_B;	2✔
267	if (has_bias_) {	2✔
268	subset_B.push_back(M_idx);	×
269	}	×
270
271	/************************
272	* Add computation node *
273	************************/
274	// Create tasklet performing fused-multiply-add: _out = _x * _w + _y
275	if (has_bias_) {	2✔
276	// Bias will be added after reduction, so no change here.
277	}	×
278
279	auto& tasklet = builder.add_tasklet(	4✔
280	code_block,	2✔
281	data_flow::TaskletCode::fma,
282	"_out",	2✔
283	{"_x", "_w", "_y"},	2✔
284	builder.debug_info().get_region(block.debug_info().indices())	2✔
285	);
286
287	// Connect memlets.
288	builder.add_computational_memlet(	4✔
289	code_block,	2✔
290	X_acc,	2✔
291	tasklet,	2✔
292	"_x",	2✔
293	subset_X,
294	iedge_X->base_type(),	2✔
295	builder.debug_info().get_region(block.debug_info().indices())	2✔
296	);
297	builder.add_computational_memlet(	4✔
298	code_block,	2✔
299	W_acc,	2✔
300	tasklet,	2✔
301	"_w",	2✔
302	subset_W,
303	iedge_W->base_type(),	2✔
304	builder.debug_info().get_region(block.debug_info().indices())	2✔
305	);
306	builder.add_computational_memlet(	4✔
307	code_block,	2✔
308	Y_acc_in,	2✔
309	tasklet,	2✔
310	"_y",	2✔
311	subset_Y,
312	oedge_Y->base_type(),	2✔
313	builder.debug_info().get_region(block.debug_info().indices())	2✔
314	);
315	builder.add_computational_memlet(	4✔
316	code_block,	2✔
317	tasklet,	2✔
318	"_out",	2✔
319	Y_acc_out,	2✔
320	subset_Y,
321	oedge_Y->base_type(),	2✔
322	builder.debug_info().get_region(block.debug_info().indices())	2✔
323	);
324
325	// Bias: add once per output element outside reduction loops.
326	if (has_bias_) {	2✔
327	// Insert after the reduction loops (i.e., right after they finish).
328	// We add a single tasklet in the parent scope (last_map root).
329	std::string B_name = static_cast<const data_flow::AccessNode&>(iedge_B->src()).data();	×
NEW 330	auto& bias_block =	×
NEW 331	builder.add_block(last_map->root(), {}, builder.debug_info().get_region(block.debug_info().indices()));	×
332	auto& B_acc_local = builder.add_access(bias_block, B_name, dbg_B);	×
333	auto& Y_acc2_in = builder.add_access(bias_block, Y_name, dbg_Y);	×
334	auto& Y_acc2_out = builder.add_access(bias_block, Y_name, dbg_Y);	×
335
NEW 336	auto& bias_tasklet = builder.add_tasklet(	×
NEW 337	bias_block,	×
338	data_flow::TaskletCode::add,
NEW 339	"_out",	×
NEW 340	{"_bias", "_y"},	×
NEW 341	builder.debug_info().get_region(block.debug_info().indices())	×
342	);
343	builder.add_computational_memlet(	×
NEW 344	bias_block,	×
NEW 345	B_acc_local,	×
NEW 346	bias_tasklet,	×
NEW 347	"_bias",	×
348	subset_B,
NEW 349	iedge_B->base_type(),	×
NEW 350	builder.debug_info().get_region(block.debug_info().indices())	×
351	);
352	builder.add_computational_memlet(	×
NEW 353	bias_block,	×
NEW 354	Y_acc2_in,	×
NEW 355	bias_tasklet,	×
NEW 356	"_y",	×
357	subset_Y,
NEW 358	oedge_Y->base_type(),	×
NEW 359	builder.debug_info().get_region(block.debug_info().indices())	×
360	);
361	builder.add_computational_memlet(	×
NEW 362	bias_block,	×
NEW 363	bias_tasklet,	×
NEW 364	"_out",	×
NEW 365	Y_acc2_out,	×
366	subset_Y,
NEW 367	oedge_Y->base_type(),	×
NEW 368	builder.debug_info().get_region(block.debug_info().indices())	×
369	);
370	}	×
371
372	// Clean up block
373	builder.remove_memlet(block, *iedge_X);	2✔
374	builder.remove_memlet(block, *iedge_W);	2✔
375	if (iedge_B != nullptr) {	2✔
376	builder.remove_memlet(block, *iedge_B);	×
377	}	×
378	builder.remove_memlet(block, *oedge_Y);	2✔
379	builder.remove_node(block, *this);	2✔
380	builder.remove_child(parent, index + 1);	2✔
381
382	return true;
383	}	2✔
384
385	std::unique_ptr<data_flow::DataFlowNode> ConvNode::
386	clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {	×
387	return std::unique_ptr<data_flow::DataFlowNode>(new ConvNode(	×
388	element_id,	×
389	this->debug_info(),	×
390	vertex,	×
391	parent,	×
392	this->has_bias_,	×
393	this->dilations_,	×
394	this->kernel_shape_,	×
395	this->pads_,	×
396	this->strides_	×
397	));
398	}	×
399
400	nlohmann::json ConvNodeSerializer::serialize(const data_flow::LibraryNode& library_node) {	×
401	const ConvNode& relu_node = static_cast<const ConvNode&>(library_node);	×
402	nlohmann::json j;	×
403
404	j["code"] = relu_node.code().value();	×
405	j["outputs"] = relu_node.outputs();	×
406	j["inputs"] = relu_node.inputs();	×
407	j["has_bias"] = relu_node.has_bias();	×
408	j["dilations"] = relu_node.dilations();	×
409	j["kernel_shape"] = relu_node.kernel_shape();	×
410	j["pads"] = relu_node.pads();	×
411	j["strides"] = relu_node.strides();	×
412
413	return j;	×
414	}	×
415
416	data_flow::LibraryNode& ConvNodeSerializer::deserialize(	×
417	const nlohmann::json& j, builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
418	) {
419	auto code = j["code"].get<std::string>();	×
420	if (code != LibraryNodeType_Conv.value()) {	×
421	throw std::runtime_error("Invalid library node code");	×
422	}
423
424	sdfg::serializer::JSONSerializer serializer;	×
NEW 425	DebugInfoRegion debug_info = serializer.json_to_debug_info_region(j["debug_info"], builder.debug_info());	×
426
427	auto outputs = j.at("outputs").get<std::vector<std::string>>();	×
428	auto inputs = j.at("inputs").get<std::vector<std::string>>();	×
429	auto has_bias = j.at("has_bias").get<bool>();	×
430	auto dilations = j.at("dilations").get<std::vector<size_t>>();	×
431	auto kernel_shape = j.at("kernel_shape").get<std::vector<size_t>>();	×
432	auto pads = j.at("pads").get<std::vector<size_t>>();	×
433	auto strides = j.at("strides").get<std::vector<size_t>>();	×
434
435	return builder.add_library_node<ConvNode>(parent, debug_info, has_bias, dilations, kernel_shape, pads, strides);	×
436	}	×
437
438	} // namespace ml
439	} // namespace math
440	} // namespace sdfg

daisytuner / sdfglib / 17637380013

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous