23848280014

Committed 01 Apr 2026 12:21PM UTC coverage: 64.42% (-0.07%) from 64.49%

Build # 23848280014

Build Type

Pull #624

github

Committed by

web-flow

Commit Message

Merge 2e8bcfaf8 into 53580ad0f

Pull Request Pull Request #624: Skip redundant invalidates on perfect loop distribution

Coverage Stats

43 of 45 new or added lines in 2 files covered. (95.56%)

286 existing lines in 8 files now uncovered.

28812 of 44725 relevant lines covered (64.42%)

435.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.61

/opt/src/targets/cuda/cuda_data_offloading_node.cpp

#include "sdfg/targets/cuda/cuda_data_offloading_node.h"

#include <cstddef>
#include <memory>
#include <nlohmann/json_fwd.hpp>

#include "sdfg/analysis/loop_analysis.h"
#include "sdfg/codegen/code_snippet_factory.h"
#include "sdfg/codegen/dispatchers/block_dispatcher.h"
#include "sdfg/codegen/instrumentation/instrumentation_info.h"
#include "sdfg/codegen/language_extension.h"
#include "sdfg/codegen/utils.h"
#include "sdfg/data_flow/data_flow_graph.h"
#include "sdfg/data_flow/data_flow_node.h"
#include "sdfg/data_flow/library_node.h"
#include "sdfg/function.h"
#include "sdfg/graph/graph.h"
#include "sdfg/symbolic/symbolic.h"
#include "sdfg/targets/cuda/cuda.h"
#include "sdfg/targets/offloading/data_offloading_node.h"
#include "symengine/symengine_rcp.h"

namespace sdfg {
namespace cuda {

CUDADataOffloadingNode::CUDADataOffloadingNode(
    size_t element_id,
    const DebugInfo& debug_info,
    const graph::Vertex vertex,
    data_flow::DataFlowGraph& parent,
    symbolic::Expression size,
    symbolic::Expression device_id,
    offloading::DataTransferDirection transfer_direction,
    offloading::BufferLifecycle buffer_lifecycle
)
    : offloading::DataOffloadingNode(
          element_id,
          debug_info,
          vertex,
          parent,
          LibraryNodeType_CUDA_Offloading,
          {},
          {},
          transfer_direction,
          buffer_lifecycle,
          size
      ),
      device_id_(device_id) {
    if (!is_NONE(transfer_direction)) {
        this->inputs_.push_back("_src");
        this->outputs_.push_back("_dst");
    } else if (is_ALLOC(buffer_lifecycle)) {
        this->outputs_.push_back("_ret");
    } else if (is_FREE(buffer_lifecycle)) {
        this->inputs_.push_back("_ptr");
        this->outputs_.push_back("_ptr");
    }
}

void CUDADataOffloadingNode::validate(const Function& function) const {
    // Prevent copy-in and free
    if (this->is_h2d() && this->is_free()) {
        throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-in and free is not allowed");
    }

    // Prevent copy-out and alloc
    if (this->is_d2h() && this->is_alloc()) {
        throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-out and alloc is not allowed");
    }
}

const symbolic::Expression CUDADataOffloadingNode::device_id() const { return this->device_id_; }

std::unique_ptr<data_flow::DataFlowNode> CUDADataOffloadingNode::
    clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {
    return std::make_unique<CUDADataOffloadingNode>(
        element_id,
        this->debug_info(),
        vertex,
        parent,
        this->size(),
        this->device_id(),
        this->transfer_direction(),
        this->buffer_lifecycle()
    );
}

symbolic::SymbolSet CUDADataOffloadingNode::symbols() const {
    if (this->device_id().is_null()) {
        return offloading::DataOffloadingNode::symbols();
    }
    auto symbols = offloading::DataOffloadingNode::symbols();
    auto device_id_atoms = symbolic::atoms(this->device_id());
    symbols.insert(device_id_atoms.begin(), device_id_atoms.end());
    return symbols;
}

void CUDADataOffloadingNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {
    offloading::DataOffloadingNode::replace(old_expression, new_expression);
    this->device_id_ = symbolic::subs(this->device_id_, old_expression, new_expression);
}

bool CUDADataOffloadingNode::blocking() const { return true; }

bool CUDADataOffloadingNode::redundant_with(const offloading::DataOffloadingNode& other) const {
    if (!offloading::DataOffloadingNode::redundant_with(other)) {
        return false;
    }

    auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);
    if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {
        return false;
    }

    return true;
}

bool CUDADataOffloadingNode::equal_with(const offloading::DataOffloadingNode& other) const {
    if (!offloading::DataOffloadingNode::equal_with(other)) {
        return false;
    }

    auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);
    if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {
        return false;
    }

    return true;
}

bool CUDADataOffloadingNode::is_same_target(const offloading::DataOffloadingNode& other) const {
    return dynamic_cast<const CUDADataOffloadingNode*>(&other) != nullptr;
    // TODO check for dev ID
}

CUDADataOffloadingNodeDispatcher::CUDADataOffloadingNodeDispatcher(
    codegen::LanguageExtension& language_extension,
    const Function& function,
    const data_flow::DataFlowGraph& data_flow_graph,
    const data_flow::LibraryNode& node
)
    : codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}

void CUDADataOffloadingNodeDispatcher::dispatch_code(
    codegen::PrettyPrinter& stream,
    codegen::PrettyPrinter& globals_stream,
    codegen::CodeSnippetFactory& library_snippet_factory
) {
    auto& offloading_node = static_cast<const CUDADataOffloadingNode&>(this->node_);

    // stream << "cudaSetDevice(" << this->language_extension_.expression(offloading_node.device_id()) << ");"
    //        << std::endl;

    stream << "cudaError_t err;" << std::endl;

    if (offloading_node.is_alloc()) {
        stream << "err = cudaMalloc(&" << offloading_node.output(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ");" << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }

    if (offloading_node.is_h2d()) {
        stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyHostToDevice);"
               << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    } else if (offloading_node.is_d2h()) {
        stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "
               << this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyDeviceToHost);"
               << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }

    if (offloading_node.is_free()) {
        stream << "err = cudaFree(" << offloading_node.input(0) << ");" << std::endl;
        cuda_error_checking(stream, this->language_extension_, "err");
    }
}

codegen::InstrumentationInfo CUDADataOffloadingNodeDispatcher::instrumentation_info() const {
    auto& cuda_node = static_cast<const CUDADataOffloadingNode&>(node_);
    if (cuda_node.is_d2h()) {
        return codegen::InstrumentationInfo(
            node_.element_id(),
            codegen::ElementType_D2HTransfer,
            TargetType_CUDA,
            analysis::LoopInfo{},
            {{"pcie_bytes", language_extension_.expression(cuda_node.size())}}
        );
    } else if (cuda_node.is_h2d()) {
        return codegen::InstrumentationInfo(
            node_.element_id(),
            codegen::ElementType_H2DTransfer,
            TargetType_CUDA,
            analysis::LoopInfo{},
            {{"pcie_bytes", language_extension_.expression(cuda_node.size())}}
        );
    } else {
        return codegen::LibraryNodeDispatcher::instrumentation_info();
    }
}

nlohmann::json CUDADataOffloadingNodeSerializer::serialize(const sdfg::data_flow::LibraryNode& library_node) {
    const auto& node = static_cast<const CUDADataOffloadingNode&>(library_node);
    nlohmann::json j;

    // Library node
    j["type"] = "library_node";
    j["element_id"] = library_node.element_id();

    // Debug info
    auto& debug_info = library_node.debug_info();
    j["has"] = debug_info.has();
    j["filename"] = debug_info.filename();
    j["start_line"] = debug_info.start_line();
    j["start_column"] = debug_info.start_column();
    j["end_line"] = debug_info.end_line();
    j["end_column"] = debug_info.end_column();

    // Library node properties
    j["code"] = std::string(library_node.code().value());

    // Offloading node properties
    sdfg::serializer::JSONSerializer serializer;
    if (node.size().is_null()) {
        j["size"] = nlohmann::json::value_t::null;
    } else {
        j["size"] = serializer.expression(node.size());
    }
    j["device_id"] = serializer.expression(node.device_id());
    j["transfer_direction"] = static_cast<int8_t>(node.transfer_direction());
    j["buffer_lifecycle"] = static_cast<int8_t>(node.buffer_lifecycle());

    return j;
}

data_flow::LibraryNode& CUDADataOffloadingNodeSerializer::deserialize(
    const nlohmann::json& j, sdfg::builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
) {
    auto code = j["code"].get<std::string>();
    if (code != LibraryNodeType_CUDA_Offloading.value()) {
        throw std::runtime_error("Invalid library node code");
    }

    sdfg::serializer::JSONSerializer serializer;
    DebugInfo debug_info = serializer.json_to_debug_info(j["debug_info"]);

    symbolic::Expression size;
    if (!j.contains("size") || j.at("size").is_null()) {
        size = SymEngine::null;
    } else {
        size = symbolic::parse(j.at("size"));
    }
    SymEngine::Expression device_id(j.at("device_id"));
    auto transfer_direction = static_cast<offloading::DataTransferDirection>(j["transfer_direction"].get<int8_t>());
    auto buffer_lifecycle = static_cast<offloading::BufferLifecycle>(j["buffer_lifecycle"].get<int8_t>());

    return builder.add_library_node<
        CUDADataOffloadingNode>(parent, debug_info, size, device_id, transfer_direction, buffer_lifecycle);
}

} // namespace cuda
} // namespace sdfg

1	#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
2
3	#include <cstddef>
4	#include <memory>
5	#include <nlohmann/json_fwd.hpp>
6
7	#include "sdfg/analysis/loop_analysis.h"
8	#include "sdfg/codegen/code_snippet_factory.h"
9	#include "sdfg/codegen/dispatchers/block_dispatcher.h"
10	#include "sdfg/codegen/instrumentation/instrumentation_info.h"
11	#include "sdfg/codegen/language_extension.h"
12	#include "sdfg/codegen/utils.h"
13	#include "sdfg/data_flow/data_flow_graph.h"
14	#include "sdfg/data_flow/data_flow_node.h"
15	#include "sdfg/data_flow/library_node.h"
16	#include "sdfg/function.h"
17	#include "sdfg/graph/graph.h"
18	#include "sdfg/symbolic/symbolic.h"
19	#include "sdfg/targets/cuda/cuda.h"
20	#include "sdfg/targets/offloading/data_offloading_node.h"
21	#include "symengine/symengine_rcp.h"
22
23	namespace sdfg {
24	namespace cuda {
25
26	CUDADataOffloadingNode::CUDADataOffloadingNode(
27	size_t element_id,
28	const DebugInfo& debug_info,
29	const graph::Vertex vertex,
30	data_flow::DataFlowGraph& parent,
31	symbolic::Expression size,
32	symbolic::Expression device_id,
33	offloading::DataTransferDirection transfer_direction,
34	offloading::BufferLifecycle buffer_lifecycle
35	)
36	: offloading::DataOffloadingNode(	59✔
37	element_id,	59✔
38	debug_info,	59✔
39	vertex,	59✔
40	parent,	59✔
41	LibraryNodeType_CUDA_Offloading,	59✔
42	{},	59✔
43	{},	59✔
44	transfer_direction,	59✔
45	buffer_lifecycle,	59✔
46	size	59✔
47	),	59✔
48	device_id_(device_id) {	59✔
49	if (!is_NONE(transfer_direction)) {	59✔
50	this->inputs_.push_back("_src");	47✔
51	this->outputs_.push_back("_dst");	47✔
52	} else if (is_ALLOC(buffer_lifecycle)) {	47✔
53	this->outputs_.push_back("_ret");	7✔
54	} else if (is_FREE(buffer_lifecycle)) {	7✔
55	this->inputs_.push_back("_ptr");	5✔
56	this->outputs_.push_back("_ptr");	5✔
57	}	5✔
58	}	59✔
59
60	void CUDADataOffloadingNode::validate(const Function& function) const {	37✔
61	// Prevent copy-in and free
62	if (this->is_h2d() && this->is_free()) {	37✔
63	throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-in and free is not allowed");	×
64	}	×
65
66	// Prevent copy-out and alloc
67	if (this->is_d2h() && this->is_alloc()) {	37✔
68	throw InvalidSDFGException("CUDADataOffloadingNode: Combination copy-out and alloc is not allowed");	×
69	}	×
70	}	37✔
71
72	const symbolic::Expression CUDADataOffloadingNode::device_id() const { return this->device_id_; }	116✔
73
74	std::unique_ptr<data_flow::DataFlowNode> CUDADataOffloadingNode::
75	clone(size_t element_id, const graph::Vertex vertex, data_flow::DataFlowGraph& parent) const {	4✔
76	return std::make_unique<CUDADataOffloadingNode>(	4✔
77	element_id,	4✔
78	this->debug_info(),	4✔
79	vertex,	4✔
80	parent,	4✔
81	this->size(),	4✔
82	this->device_id(),	4✔
83	this->transfer_direction(),	4✔
84	this->buffer_lifecycle()	4✔
85	);	4✔
86	}	4✔
87
88	symbolic::SymbolSet CUDADataOffloadingNode::symbols() const {	38✔
89	if (this->device_id().is_null()) {	38✔
90	return offloading::DataOffloadingNode::symbols();	×
91	}	×
92	auto symbols = offloading::DataOffloadingNode::symbols();	38✔
93	auto device_id_atoms = symbolic::atoms(this->device_id());	38✔
94	symbols.insert(device_id_atoms.begin(), device_id_atoms.end());	38✔
95	return symbols;	38✔
96	}	38✔
97
98	void CUDADataOffloadingNode::replace(const symbolic::Expression old_expression, const symbolic::Expression new_expression) {	15✔
99	offloading::DataOffloadingNode::replace(old_expression, new_expression);	15✔
100	this->device_id_ = symbolic::subs(this->device_id_, old_expression, new_expression);	15✔
101	}	15✔
102
103	bool CUDADataOffloadingNode::blocking() const { return true; }	×
104
105	bool CUDADataOffloadingNode::redundant_with(const offloading::DataOffloadingNode& other) const {	3✔
106	if (!offloading::DataOffloadingNode::redundant_with(other)) {	3✔
107	return false;	1✔
108	}	1✔
109
110	auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);	2✔
111	if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {	2✔
112	return false;	×
113	}	×
114
115	return true;	2✔
116	}	2✔
117
118	bool CUDADataOffloadingNode::equal_with(const offloading::DataOffloadingNode& other) const {	8✔
119	if (!offloading::DataOffloadingNode::equal_with(other)) {	8✔
120	return false;	×
121	}	×
122
123	auto& other_node = static_cast<const CUDADataOffloadingNode&>(other);	8✔
124	if (!symbolic::null_safe_eq(this->device_id(), other_node.device_id())) {	8✔
125	return false;	×
126	}	×
127
128	return true;	8✔
129	}	8✔
130
UNCOV 131	bool CUDADataOffloadingNode::is_same_target(const offloading::DataOffloadingNode& other) const {	×
UNCOV 132	return dynamic_cast<const CUDADataOffloadingNode*>(&other) != nullptr;	×
133	// TODO check for dev ID
UNCOV 134	}	×
135
136	CUDADataOffloadingNodeDispatcher::CUDADataOffloadingNodeDispatcher(
137	codegen::LanguageExtension& language_extension,
138	const Function& function,
139	const data_flow::DataFlowGraph& data_flow_graph,
140	const data_flow::LibraryNode& node
141	)
142	: codegen::LibraryNodeDispatcher(language_extension, function, data_flow_graph, node) {}	4✔
143
144	void CUDADataOffloadingNodeDispatcher::dispatch_code(
145	codegen::PrettyPrinter& stream,
146	codegen::PrettyPrinter& globals_stream,
147	codegen::CodeSnippetFactory& library_snippet_factory
148	) {	4✔
149	auto& offloading_node = static_cast<const CUDADataOffloadingNode&>(this->node_);	4✔
150
151	// stream << "cudaSetDevice(" << this->language_extension_.expression(offloading_node.device_id()) << ");"
152	// << std::endl;
153
154	stream << "cudaError_t err;" << std::endl;	4✔
155
156	if (offloading_node.is_alloc()) {	4✔
157	stream << "err = cudaMalloc(&" << offloading_node.output(0) << ", "	1✔
158	<< this->language_extension_.expression(offloading_node.size()) << ");" << std::endl;	1✔
159	cuda_error_checking(stream, this->language_extension_, "err");	1✔
160	}	1✔
161
162	if (offloading_node.is_h2d()) {	4✔
163	stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "	1✔
164	<< this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyHostToDevice);"	1✔
165	<< std::endl;	1✔
166	cuda_error_checking(stream, this->language_extension_, "err");	1✔
167	} else if (offloading_node.is_d2h()) {	3✔
168	stream << "err = cudaMemcpy(" << offloading_node.output(0) << ", " << offloading_node.input(0) << ", "	1✔
169	<< this->language_extension_.expression(offloading_node.size()) << ", cudaMemcpyDeviceToHost);"	1✔
170	<< std::endl;	1✔
171	cuda_error_checking(stream, this->language_extension_, "err");	1✔
172	}	1✔
173
174	if (offloading_node.is_free()) {	4✔
175	stream << "err = cudaFree(" << offloading_node.input(0) << ");" << std::endl;	1✔
176	cuda_error_checking(stream, this->language_extension_, "err");	1✔
177	}	1✔
178	}	4✔
179
180	codegen::InstrumentationInfo CUDADataOffloadingNodeDispatcher::instrumentation_info() const {	×
181	auto& cuda_node = static_cast<const CUDADataOffloadingNode&>(node_);	×
182	if (cuda_node.is_d2h()) {	×
183	return codegen::InstrumentationInfo(	×
184	node_.element_id(),	×
185	codegen::ElementType_D2HTransfer,	×
186	TargetType_CUDA,	×
187	analysis::LoopInfo{},	×
188	{{"pcie_bytes", language_extension_.expression(cuda_node.size())}}	×
189	);	×
190	} else if (cuda_node.is_h2d()) {	×
191	return codegen::InstrumentationInfo(	×
192	node_.element_id(),	×
193	codegen::ElementType_H2DTransfer,	×
194	TargetType_CUDA,	×
195	analysis::LoopInfo{},	×
196	{{"pcie_bytes", language_extension_.expression(cuda_node.size())}}	×
UNCOV 197	);	×
UNCOV 198	} else {	×
UNCOV 199	return codegen::LibraryNodeDispatcher::instrumentation_info();	×
UNCOV 200	}	×
UNCOV 201	}	×
202
203	nlohmann::json CUDADataOffloadingNodeSerializer::serialize(const sdfg::data_flow::LibraryNode& library_node) {	8✔
204	const auto& node = static_cast<const CUDADataOffloadingNode&>(library_node);	8✔
205	nlohmann::json j;	8✔
206
207	// Library node
208	j["type"] = "library_node";	8✔
209	j["element_id"] = library_node.element_id();	8✔
210
211	// Debug info
212	auto& debug_info = library_node.debug_info();	8✔
213	j["has"] = debug_info.has();	8✔
214	j["filename"] = debug_info.filename();	8✔
215	j["start_line"] = debug_info.start_line();	8✔
216	j["start_column"] = debug_info.start_column();	8✔
217	j["end_line"] = debug_info.end_line();	8✔
218	j["end_column"] = debug_info.end_column();	8✔
219
220	// Library node properties
221	j["code"] = std::string(library_node.code().value());	8✔
222
223	// Offloading node properties
224	sdfg::serializer::JSONSerializer serializer;	8✔
225	if (node.size().is_null()) {	8✔
226	j["size"] = nlohmann::json::value_t::null;	1✔
227	} else {	7✔
228	j["size"] = serializer.expression(node.size());	7✔
229	}	7✔
230	j["device_id"] = serializer.expression(node.device_id());	8✔
231	j["transfer_direction"] = static_cast<int8_t>(node.transfer_direction());	8✔
232	j["buffer_lifecycle"] = static_cast<int8_t>(node.buffer_lifecycle());	8✔
233
234	return j;	8✔
235	}	8✔
236
237	data_flow::LibraryNode& CUDADataOffloadingNodeSerializer::deserialize(
238	const nlohmann::json& j, sdfg::builder::StructuredSDFGBuilder& builder, structured_control_flow::Block& parent
239	) {	8✔
240	auto code = j["code"].get<std::string>();	8✔
241	if (code != LibraryNodeType_CUDA_Offloading.value()) {	8✔
UNCOV 242	throw std::runtime_error("Invalid library node code");	×
UNCOV 243	}	×
244
245	sdfg::serializer::JSONSerializer serializer;	8✔
246	DebugInfo debug_info = serializer.json_to_debug_info(j["debug_info"]);	8✔
247
248	symbolic::Expression size;	8✔
249	if (!j.contains("size") \|\| j.at("size").is_null()) {	8✔
250	size = SymEngine::null;	1✔
251	} else {	7✔
252	size = symbolic::parse(j.at("size"));	7✔
253	}	7✔
254	SymEngine::Expression device_id(j.at("device_id"));	8✔
255	auto transfer_direction = static_cast<offloading::DataTransferDirection>(j["transfer_direction"].get<int8_t>());	8✔
256	auto buffer_lifecycle = static_cast<offloading::BufferLifecycle>(j["buffer_lifecycle"].get<int8_t>());	8✔
257
258	return builder.add_library_node<	8✔
259	CUDADataOffloadingNode>(parent, debug_info, size, device_id, transfer_direction, buffer_lifecycle);	8✔
260	}	8✔
261
262	} // namespace cuda
263	} // namespace sdfg

daisytuner / docc / 23848280014

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous