23490543373

Committed 24 Mar 2026 12:56PM UTC coverage: 64.456% (+0.2%) from 64.295%

Build # 23490543373

Build Type

Pull #605

github

Committed by

web-flow

Commit Message

Merge 28bc2690b into e56781552

Pull Request Pull Request #605: Move einsum support

Coverage Stats

1303 of 1918 new or added lines in 14 files covered. (67.94%)

45 existing lines in 3 files now uncovered.

27952 of 43366 relevant lines covered (64.46%)

392.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.98

/opt/src/targets/cuda/cuda_data_offloading_node.cpp

#include "sdfg/targets/cuda/cuda_data_offloading_node.h"

#include <cstddef>
#include <memory>
#include <nlohmann/json_fwd.hpp>

#include "sdfg/analysis/loop_analysis.h"
#include "sdfg/codegen/code_snippet_factory.h"
#include "sdfg/codegen/dispatchers/block_dispatcher.h"
#include "sdfg/codegen/instrumentation/instrumentation_info.h"
#include "sdfg/codegen/language_extension.h"
#include "sdfg/codegen/utils.h"
#include "sdfg/data_flow/data_flow_graph.h"
#include "sdfg/data_flow/data_flow_node.h"
#include "sdfg/data_flow/library_node.h"
#include "sdfg/function.h"
#include "sdfg/graph/graph.h"
#include "sdfg/symbolic/symbolic.h"
#include "sdfg/targets/cuda/cuda.h"
#include "sdfg/targets/offloading/data_offloading_node.h"
#include "symengine/symengine_rcp.h"

namespace sdfg {
namespace cuda {

CUDADataOffloadingNode::CUDADataOffloadingNode(
    size_t element_id,
    const DebugInfo& debug_info,
    const graph::Vertex vertex,
    data_flow::DataFlowGraph& parent,
    symbolic::Expression size,
    symbolic::Expression device_id,
    offloading::DataTransferDirection transfer_direction,
    offloading::BufferLifecycle buffer_lifecycle
)
    : offloading::DataOffloadingNode(
          element_id,
          debug_info,
          vertex,
          parent,
          LibraryNodeType_CUDA_Offloading,
          {},
          {},
          transfer_direction,
          buffer_lifecycle,
          size
      ),
      device_id_(device_id) {
    if (!is_NONE(transfer_direction)) {
        this->inputs_.push_back("_src");
        this->outputs_.push_back("_dst");
    } else if (is_ALLOC(buffer_lifecycle)) {
        this->outputs_.push_back("_ret");
    } else if (is_FREE(buffer_lifecycle)) {
        this->inputs_.push_back("_ptr");
        this->outputs_.push_back("_ptr");
    }
}

void CUDADataOffloadingNode::validate(const Function& function) const {
    // Prevent copy-in and free
    if (this->is_h2d() && this->is_free()) {
        throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-in and free is not allowed");
    }

    // Prevent copy-out and alloc
    if (this->is_d2h() && this->is_alloc()) {
        throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-out and alloc is not allowed");
    }
}

const symbolic::Expression CUDADataOffloadingNode::device_id() const { return this->device_id_; }

std::unique_ptr<data_flow::DataFlowNode> CUDADataOffloadingNode::
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
    return std::make_unique<CUDADataOffloadingNode>(
        element_id,
        this->debug_info(),
        vertex,
        parent,
        this->size(),
        this->device_id(),
        this->transfer_direction(),
        this->buffer_lifecycle()
    );
}

symbolic::SymbolSet CUDADataOffloadingNode::symbols() const {
    if (this->device_id().is_null()) {
        return offloading::DataOffloadingNode::symbols();
    }
    auto symbols = offloading::DataOffloadingNode::symbols();
    auto device_id_atoms = symbolic::atoms(this->device_id());
    symbols.insert(device_id_atoms.begin(), device_id_atoms.end());
    return symbols;
}

void CUDADataOffloadingNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {
    offloading::DataOffloadingNode::replace(old_expression, new_expression);
    this->device_id_ = symbolic::subs(this->device_id_, old_expression, new_expression);
}

bool CUDADataOffloadingNode::blocking() const { return true; }

bool CUDADataOffloadingNode::redundant_with(const offloading::DataOffloadingNode& other) const {
    if (!offloading::DataOffloadingNode::redundant_with(other)) {
        return false;
    }

    auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);
    if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {
        return false;
    }

    return true;
}

bool CUDADataOffloadingNode::equal_with(const offloading::DataOffloadingNode& other) const {
    if (!offloading::DataOffloadingNode::equal_with(other)) {
        return false;
    }

    auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);
    if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {
        return false;
    }

    return true;
}

CUDADataOffloadingNodeDispatcher::CUDADataOffloadingNodeDispatcher(
    codegen::LanguageExtension& language_extension,
    const Function& function,
    const data_flow::DataFlowGraph& data_flow_graph,
    const data_flow::LibraryNode& node
)
    : codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}

void CUDADataOffloadingNodeDispatcher::dispatch_code(
    codegen::PrettyPrinter& stream,
    codegen::PrettyPrinter& globals_stream,
    codegen::CodeSnippetFactory& library_snippet_factory
) {
    auto& offloading_node = static_cast<const CUDADataOffloadingNode&>(this->node_);

    // stream << "cudaSetDevice(" << this->language_extension_.expression(offloading_node.device_id()) << ");"
    //        << std::endl;

    stream << "cudaError_t err;" << std::endl;

    if (offloading_node.is_alloc()) {
        stream << "err = cudaMalloc(&" << offloading_node.output(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ");" << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }

    if (offloading_node.is_h2d()) {
        stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyHostToDevice);"
               << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    } else if (offloading_node.is_d2h()) {
        stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyDeviceToHost);"
               << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }

    if (offloading_node.is_free()) {
        stream << "err = cudaFree(" << offloading_node.input(0) << ");" << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }
}

codegen::InstrumentationInfo CUDADataOffloadingNodeDispatcher::instrumentation_info() const {
    auto& cuda_node = static_cast<const CUDADataOffloadingNode&>(node_);
    if (cuda_node.is_d2h()) {
        return codegen::InstrumentationInfo(
            node_.element_id(),
            codegen::ElementType_D2HTransfer,
            TargetType_CUDA,
            analysis::LoopInfo{},
            {{"pcie_bytes", language_extension_.expression(cuda_node.size())}}
        );
    } else if (cuda_node.is_h2d()) {
        return codegen::InstrumentationInfo(
            node_.element_id(),
            codegen::ElementType_H2DTransfer,
            TargetType_CUDA,
            analysis::LoopInfo{},
            {{"pcie_bytes", language_extension_.expression(cuda_node.size())}}
        );
    } else {
        return codegen::LibraryNodeDispatcher::instrumentation_info();
    }
}

nlohmann::json CUDADataOffloadingNodeSerializer::serialize(const sdfg::data_flow::LibraryNode& library_node) {
    const auto& node = static_cast<const CUDADataOffloadingNode&>(library_node);
    nlohmann::json j;

    // Library node
    j["type"] = "library_node";
    j["element_id"] = library_node.element_id();

    // Debug info
    auto& debug_info = library_node.debug_info();
    j["has"] = debug_info.has();
    j["filename"] = debug_info.filename();
    j["start_line"] = debug_info.start_line();
    j["start_column"] = debug_info.start_column();
    j["end_line"] = debug_info.end_line();
    j["end_column"] = debug_info.end_column();

    // Library node properties
    j["code"] = std::string(library_node.code().value());

    // Offloading node properties
    sdfg::serializer::JSONSerializer serializer;
    if (node.size().is_null()) {
        j["size"] = nlohmann::json::value_t::null;
    } else {
        j["size"] = serializer.expression(node.size());
    }
    j["device_id"] = serializer.expression(node.device_id());
    j["transfer_direction"] = static_cast<int8_t>(node.transfer_direction());
    j["buffer_lifecycle"] = static_cast<int8_t>(node.buffer_lifecycle());

    return j;
}

data_flow::LibraryNode& CUDADataOffloadingNodeSerializer::deserialize(
    const nlohmann::json& j, sdfg::builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
) {
    auto code = j["code"].get<std::string>();
    if (code != LibraryNodeType_CUDA_Offloading.value()) {
        throw std::runtime_error("Invalid library node code");
    }

    sdfg::serializer::JSONSerializer serializer;
    DebugInfo debug_info = serializer.json_to_debug_info(j["debug_info"]);

    symbolic::Expression size;
    if (!j.contains("size") || j.at("size").is_null()) {
        size = SymEngine::null;
    } else {
        size = symbolic::parse(j.at("size"));
    }
    SymEngine::Expression device_id(j.at("device_id"));
    auto transfer_direction = static_cast<offloading::DataTransferDirection>(j["transfer_direction"].get<int8_t>());
    auto buffer_lifecycle = static_cast<offloading::BufferLifecycle>(j["buffer_lifecycle"].get<int8_t>());

    return builder.add_library_node<
        CUDADataOffloadingNode>(parent, debug_info, size, device_id, transfer_direction, buffer_lifecycle);
}

} // namespace cuda
} // namespace sdfg

1	#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
2
3	#include <cstddef>
4	#include <memory>
5	#include <nlohmann/json_fwd.hpp>
6
7	#include "sdfg/analysis/loop_analysis.h"
8	#include "sdfg/codegen/code_snippet_factory.h"
9	#include "sdfg/codegen/dispatchers/block_dispatcher.h"
10	#include "sdfg/codegen/instrumentation/instrumentation_info.h"
11	#include "sdfg/codegen/language_extension.h"
12	#include "sdfg/codegen/utils.h"
13	#include "sdfg/data_flow/data_flow_graph.h"
14	#include "sdfg/data_flow/data_flow_node.h"
15	#include "sdfg/data_flow/library_node.h"
16	#include "sdfg/function.h"
17	#include "sdfg/graph/graph.h"
18	#include "sdfg/symbolic/symbolic.h"
19	#include "sdfg/targets/cuda/cuda.h"
20	#include "sdfg/targets/offloading/data_offloading_node.h"
21	#include "symengine/symengine_rcp.h"
22
23	namespace sdfg {
24	namespace cuda {
25
26	CUDADataOffloadingNode::CUDADataOffloadingNode(
27	size_t element_id,
28	const DebugInfo& debug_info,
29	const graph::Vertex vertex,
30	data_flow::DataFlowGraph& parent,
31	symbolic::Expression size,
32	symbolic::Expression device_id,
33	offloading::DataTransferDirection transfer_direction,
34	offloading::BufferLifecycle buffer_lifecycle
35	)
36	: offloading::DataOffloadingNode(	59✔
37	element_id,	59✔
38	debug_info,	59✔
39	vertex,	59✔
40	parent,	59✔
41	LibraryNodeType_CUDA_Offloading,	59✔
42	{},	59✔
43	{},	59✔
44	transfer_direction,	59✔
45	buffer_lifecycle,	59✔
46	size	59✔
47	),	59✔
48	device_id_(device_id) {	59✔
49	if (!is_NONE(transfer_direction)) {	59✔
50	this->inputs_.push_back("_src");	47✔
51	this->outputs_.push_back("_dst");	47✔
52	} else if (is_ALLOC(buffer_lifecycle)) {	47✔
53	this->outputs_.push_back("_ret");	7✔
54	} else if (is_FREE(buffer_lifecycle)) {	7✔
55	this->inputs_.push_back("_ptr");	5✔
56	this->outputs_.push_back("_ptr");	5✔
57	}	5✔
58	}	59✔
59
60	void CUDADataOffloadingNode::validate(const Function& function) const {	31✔
61	// Prevent copy-in and free
62	if (this->is_h2d() && this->is_free()) {	31✔
63	throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-in and free is not allowed");	×
64	}	×
65
66	// Prevent copy-out and alloc
67	if (this->is_d2h() && this->is_alloc()) {	31✔
68	throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-out and alloc is not allowed");	×
69	}	×
70	}	31✔
71
72	const symbolic::Expression CUDADataOffloadingNode::device_id() const { return this->device_id_; }	104✔
73
74	std::unique_ptr<data_flow::DataFlowNode> CUDADataOffloadingNode::
75	clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {	4✔
76	return std::make_unique<CUDADataOffloadingNode>(	4✔
77	element_id,	4✔
78	this->debug_info(),	4✔
79	vertex,	4✔
80	parent,	4✔
81	this->size(),	4✔
82	this->device_id(),	4✔
83	this->transfer_direction(),	4✔
84	this->buffer_lifecycle()	4✔
85	);	4✔
86	}	4✔
87
88	symbolic::SymbolSet CUDADataOffloadingNode::symbols() const {	32✔
89	if (this->device_id().is_null()) {	32✔
90	return offloading::DataOffloadingNode::symbols();	×
91	}	×
92	auto symbols = offloading::DataOffloadingNode::symbols();	32✔
93	auto device_id_atoms = symbolic::atoms(this->device_id());	32✔
94	symbols.insert(device_id_atoms.begin(), device_id_atoms.end());	32✔
95	return symbols;	32✔
96	}	32✔
97
98	void CUDADataOffloadingNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {	15✔
99	offloading::DataOffloadingNode::replace(old_expression, new_expression);	15✔
100	this->device_id_ = symbolic::subs(this->device_id_, old_expression, new_expression);	15✔
101	}	15✔
102
103	bool CUDADataOffloadingNode::blocking() const { return true; }	×
104
105	bool CUDADataOffloadingNode::redundant_with(const offloading::DataOffloadingNode& other) const {	3✔
106	if (!offloading::DataOffloadingNode::redundant_with(other)) {	3✔
107	return false;	1✔
108	}	1✔
109
110	auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);	2✔
111	if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {	2✔
112	return false;	×
113	}	×
114
115	return true;	2✔
116	}	2✔
117
118	bool CUDADataOffloadingNode::equal_with(const offloading::DataOffloadingNode& other) const {	8✔
119	if (!offloading::DataOffloadingNode::equal_with(other)) {	8✔
120	return false;	×
121	}	×
122
123	auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);	8✔
124	if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {	8✔
125	return false;	×
126	}	×
127
128	return true;	8✔
129	}	8✔
130
131	CUDADataOffloadingNodeDispatcher::CUDADataOffloadingNodeDispatcher(
132	codegen::LanguageExtension& language_extension,
133	const Function& function,
134	const data_flow::DataFlowGraph& data_flow_graph,
135	const data_flow::LibraryNode& node
136	)
137	: codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}	4✔
138
139	void CUDADataOffloadingNodeDispatcher::dispatch_code(
140	codegen::PrettyPrinter& stream,
141	codegen::PrettyPrinter& globals_stream,
142	codegen::CodeSnippetFactory& library_snippet_factory
143	) {	4✔
144	auto& offloading_node = static_cast<const CUDADataOffloadingNode&>(this->node_);	4✔
145
146	// stream << "cudaSetDevice(" << this->language_extension_.expression(offloading_node.device_id()) << ");"
147	// << std::endl;
148
149	stream << "cudaError_t err;" << std::endl;	4✔
150
151	if (offloading_node.is_alloc()) {	4✔
152	stream << "err = cudaMalloc(&" << offloading_node.output(0) << ", "	1✔
153	<< this->language_extension_.expression(offloading_node.size()) << ");" << std::endl;	1✔
154	cuda_error_checking(stream, this->language_extension_, "err");	1✔
155	}	1✔
156
157	if (offloading_node.is_h2d()) {	4✔
158	stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "	1✔
159	<< this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyHostToDevice);"	1✔
160	<< std::endl;	1✔
161	cuda_error_checking(stream, this->language_extension_, "err");	1✔
162	} else if (offloading_node.is_d2h()) {	3✔
163	stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "	1✔
164	<< this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyDeviceToHost);"	1✔
165	<< std::endl;	1✔
166	cuda_error_checking(stream, this->language_extension_, "err");	1✔
167	}	1✔
168
169	if (offloading_node.is_free()) {	4✔
170	stream << "err = cudaFree(" << offloading_node.input(0) << ");" << std::endl;	1✔
171	cuda_error_checking(stream, this->language_extension_, "err");	1✔
172	}	1✔
173	}	4✔
174
UNCOV 175	codegen::InstrumentationInfo CUDADataOffloadingNodeDispatcher::instrumentation_info() const {	×
UNCOV 176	auto& cuda_node = static_cast<const CUDADataOffloadingNode&>(node_);	×
UNCOV 177	if (cuda_node.is_d2h()) {	×
UNCOV 178	return codegen::InstrumentationInfo(	×
UNCOV 179	node_.element_id(),	×
180	codegen::ElementType_D2HTransfer,	×
181	TargetType_CUDA,	×
182	analysis::LoopInfo{},	×
183	{{"pcie_bytes", language_extension_.expression(cuda_node.size())}}	×
184	);	×
185	} else if (cuda_node.is_h2d()) {	×
186	return codegen::InstrumentationInfo(	×
187	node_.element_id(),	×
188	codegen::ElementType_H2DTransfer,	×
189	TargetType_CUDA,	×
190	analysis::LoopInfo{},	×
191	{{"pcie_bytes", language_extension_.expression(cuda_node.size())}}	×
192	);	×
193	} else {	×
194	return codegen::LibraryNodeDispatcher::instrumentation_info();	×
195	}	×
196	}	×
197
198	nlohmann::json CUDADataOffloadingNodeSerializer::serialize(const sdfg::data_flow::LibraryNode& library_node) {	8✔
199	const auto& node = static_cast<const CUDADataOffloadingNode&>(library_node);	8✔
200	nlohmann::json j;	8✔
201
202	// Library node
203	j["type"] = "library_node";	8✔
204	j["element_id"] = library_node.element_id();	8✔
205
206	// Debug info
207	auto& debug_info = library_node.debug_info();	8✔
208	j["has"] = debug_info.has();	8✔
209	j["filename"] = debug_info.filename();	8✔
210	j["start_line"] = debug_info.start_line();	8✔
211	j["start_column"] = debug_info.start_column();	8✔
212	j["end_line"] = debug_info.end_line();	8✔
213	j["end_column"] = debug_info.end_column();	8✔
214
215	// Library node properties
216	j["code"] = std::string(library_node.code().value());	8✔
217
218	// Offloading node properties
219	sdfg::serializer::JSONSerializer serializer;	8✔
220	if (node.size().is_null()) {	8✔
221	j["size"] = nlohmann::json::value_t::null;	1✔
222	} else {	7✔
223	j["size"] = serializer.expression(node.size());	7✔
224	}	7✔
225	j["device_id"] = serializer.expression(node.device_id());	8✔
226	j["transfer_direction"] = static_cast<int8_t>(node.transfer_direction());	8✔
227	j["buffer_lifecycle"] = static_cast<int8_t>(node.buffer_lifecycle());	8✔
228
229	return j;	8✔
230	}	8✔
231
232	data_flow::LibraryNode& CUDADataOffloadingNodeSerializer::deserialize(
233	const nlohmann::json& j, sdfg::builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
234	) {	8✔
235	auto code = j["code"].get<std::string>();	8✔
236	if (code != LibraryNodeType_CUDA_Offloading.value()) {	8✔
UNCOV 237	throw std::runtime_error("Invalid library node code");	×
UNCOV 238	}	×
239
240	sdfg::serializer::JSONSerializer serializer;	8✔
241	DebugInfo debug_info = serializer.json_to_debug_info(j["debug_info"]);	8✔
242
243	symbolic::Expression size;	8✔
244	if (!j.contains("size") \|\| j.at("size").is_null()) {	8✔
245	size = SymEngine::null;	1✔
246	} else {	7✔
247	size = symbolic::parse(j.at("size"));	7✔
248	}	7✔
249	SymEngine::Expression device_id(j.at("device_id"));	8✔
250	auto transfer_direction = static_cast<offloading::DataTransferDirection>(j["transfer_direction"].get<int8_t>());	8✔
251	auto buffer_lifecycle = static_cast<offloading::BufferLifecycle>(j["buffer_lifecycle"].get<int8_t>());	8✔
252
253	return builder.add_library_node<	8✔
254	CUDADataOffloadingNode>(parent, debug_info, size, device_id, transfer_direction, buffer_lifecycle);	8✔
255	}	8✔
256
257	} // namespace cuda
258	} // namespace sdfg

daisytuner / docc / 23490543373

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous