28106147644

Committed 24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%

Build # 28106147644

Build Type

Pull #806

github

Committed by

web-flow

Commit Message

Merge 2be414d54 into 57cc1db99

Pull Request Pull Request #806: Map Collapse for Multiple targets in a neste sequence

Coverage Stats

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.9

/opt/src/targets/cuda/plugin.cpp

#include "sdfg/targets/cuda/plugin.h"


namespace sdfg::cuda {

void register_cuda_plugin(plugins::Context& context) {
    auto& libNodeDispatcherRegistry = context.library_node_dispatcher_registry;
    auto& mapDispatcherRegistry = context.map_dispatcher_registry;
    auto& libNodeSerRegistry = context.library_node_serializer_registry;

    mapDispatcherRegistry.register_map_dispatcher(
        ScheduleType_CUDA::value(),
        [](codegen::LanguageExtension& language_extension,
           StructuredSDFG& sdfg,
           analysis::AnalysisManager& analysis_manager,
           structured_control_flow::Map& node,
           codegen::InstrumentationPlan& instrumentation_plan,
           codegen::ArgCapturePlan& arg_capture_plan) {
            return std::make_unique<CUDAMapDispatcher>(
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
            );
        }
    );

    libNodeDispatcherRegistry.register_library_node_dispatcher(
        cuda::LibraryNodeType_CUDA_Offloading.value() + "::" + data_flow::ImplementationType_NONE.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<
                cuda::CUDADataOffloadingNodeDispatcher>(language_extension, function, data_flow_graph, node);
        }
    );

    libNodeSerRegistry.register_library_node_serializer(cuda::LibraryNodeType_CUDA_Offloading.value(), []() {
        return std::make_unique<cuda::CUDADataOffloadingNodeSerializer>();
    });


    // Dot - CUBLAS with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::DotNodeDispatcher_CUBLASWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
            );
        }
    );
    // Dot - CUBLAS without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::DotNodeDispatcher_CUBLASWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
            );
        }
    );

    // GEMM - CUBLAS with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
            );
        }
    );
    // GEMM - CUBLAS without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
            );
        }
    );

    // BatchedGEMM - CUBLAS with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
            );
        }
    );
    // BatchedGEMM - CUBLAS without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
            );
        }
    );


    // Softmax - CUDA with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::math::tensor::LibraryNodeType_Softmax.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)
            );
        }
    );
    // Softmax - CUDA without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::math::tensor::LibraryNodeType_Softmax.value() +
            "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)
            );
        }
    );


    // Memset - CUDA with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
            );
        }
    );
    // Memset - CUDA without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
            );
        }
    );


    // Memcpy - CUDA with data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
            );
        }
    );
    // Memcpy - CUDA without data transfers
    libNodeDispatcherRegistry.register_library_node_dispatcher(
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
        [](codegen::LanguageExtension& language_extension,
           const Function& function,
           const data_flow::DataFlowGraph& data_flow_graph,
           const data_flow::LibraryNode& node) {
            return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithoutTransfers>(
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
            );
        }
    );


    context.scheduler_registry
        .register_loop_scheduler<passes::scheduler::CUDAScheduler>(passes::scheduler::CUDAScheduler::target());
}

} // namespace sdfg::cuda

1	#include "sdfg/targets/cuda/plugin.h"
2
3
4	namespace sdfg::cuda {
5
6	void register_cuda_plugin(plugins::Context& context) {	7✔
7	auto& libNodeDispatcherRegistry = context.library_node_dispatcher_registry;	7✔
8	auto& mapDispatcherRegistry = context.map_dispatcher_registry;	7✔
9	auto& libNodeSerRegistry = context.library_node_serializer_registry;	7✔
10
11	mapDispatcherRegistry.register_map_dispatcher(	7✔
12	ScheduleType_CUDA::value(),	7✔
13	[](codegen::LanguageExtension& language_extension,	7✔
14	StructuredSDFG& sdfg,	7✔
15	analysis::AnalysisManager& analysis_manager,	7✔
16	structured_control_flow::Map& node,	7✔
17	codegen::InstrumentationPlan& instrumentation_plan,	7✔
18	codegen::ArgCapturePlan& arg_capture_plan) {	7✔
19	return std::make_unique<CUDAMapDispatcher>(	4✔
20	language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan	4✔
21	);	4✔
22	}	4✔
23	);	7✔
24
25	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
26	cuda::LibraryNodeType_CUDA_Offloading.value() + "::" + data_flow::ImplementationType_NONE.value(),	7✔
27	[](codegen::LanguageExtension& language_extension,	7✔
28	const Function& function,	7✔
29	const data_flow::DataFlowGraph& data_flow_graph,	7✔
30	const data_flow::LibraryNode& node) {	7✔
31	return std::make_unique<	2✔
32	cuda::CUDADataOffloadingNodeDispatcher>(language_extension, function, data_flow_graph, node);	2✔
33	}	2✔
34	);	7✔
35
36	libNodeSerRegistry.register_library_node_serializer(cuda::LibraryNodeType_CUDA_Offloading.value(), []() {	8✔
37	return std::make_unique<cuda::CUDADataOffloadingNodeSerializer>();	8✔
38	});	8✔
39
40
41	// Dot - CUBLAS with data transfers
42	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
43	math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
44	[](codegen::LanguageExtension& language_extension,	7✔
45	const Function& function,	7✔
46	const data_flow::DataFlowGraph& data_flow_graph,	7✔
47	const data_flow::LibraryNode& node) {	7✔
48	return std::make_unique<blas::DotNodeDispatcher_CUBLASWithTransfers>(	×
49	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)	×
50	);	×
51	}	×
52	);	7✔
53	// Dot - CUBLAS without data transfers
54	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
55	math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
56	[](codegen::LanguageExtension& language_extension,	7✔
57	const Function& function,	7✔
58	const data_flow::DataFlowGraph& data_flow_graph,	7✔
59	const data_flow::LibraryNode& node) {	7✔
60	return std::make_unique<blas::DotNodeDispatcher_CUBLASWithoutTransfers>(	×
61	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)	×
62	);	×
63	}	×
64	);	7✔
65
66	// GEMM - CUBLAS with data transfers
67	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
68	math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
69	[](codegen::LanguageExtension& language_extension,	7✔
70	const Function& function,	7✔
71	const data_flow::DataFlowGraph& data_flow_graph,	7✔
72	const data_flow::LibraryNode& node) {	7✔
73	return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithTransfers>(	×
74	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)	×
75	);	×
76	}	×
77	);	7✔
78	// GEMM - CUBLAS without data transfers
79	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
80	math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
81	[](codegen::LanguageExtension& language_extension,	7✔
82	const Function& function,	7✔
83	const data_flow::DataFlowGraph& data_flow_graph,	7✔
84	const data_flow::LibraryNode& node) {	7✔
85	return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithoutTransfers>(	×
86	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)	×
87	);	×
88	}	×
89	);	7✔
90
91	// BatchedGEMM - CUBLAS with data transfers
92	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
93	math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
94	[](codegen::LanguageExtension& language_extension,	7✔
95	const Function& function,	7✔
96	const data_flow::DataFlowGraph& data_flow_graph,	7✔
97	const data_flow::LibraryNode& node) {	7✔
98	return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithTransfers>(	×
99	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)	×
100	);	×
101	}	×
102	);	7✔
103	// BatchedGEMM - CUBLAS without data transfers
104	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
105	math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
106	[](codegen::LanguageExtension& language_extension,	7✔
107	const Function& function,	7✔
108	const data_flow::DataFlowGraph& data_flow_graph,	7✔
109	const data_flow::LibraryNode& node) {	7✔
110	return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithoutTransfers>(	×
111	language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)	×
112	);	×
113	}	×
114	);	7✔
115
116
117	// Softmax - CUDA with data transfers
118	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
119	sdfg::math::tensor::LibraryNodeType_Softmax.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
120	[](codegen::LanguageExtension& language_extension,	7✔
121	const Function& function,	7✔
122	const data_flow::DataFlowGraph& data_flow_graph,	7✔
123	const data_flow::LibraryNode& node) {	7✔
124	return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithTransfers>(	2✔
125	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)	2✔
126	);	2✔
127	}	2✔
128	);	7✔
129	// Softmax - CUDA without data transfers
130	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
131	sdfg::math::tensor::LibraryNodeType_Softmax.value() +	7✔
132	"::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
133	[](codegen::LanguageExtension& language_extension,	7✔
134	const Function& function,	7✔
135	const data_flow::DataFlowGraph& data_flow_graph,	7✔
136	const data_flow::LibraryNode& node) {	7✔
137	return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithoutTransfers>(	3✔
138	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)	3✔
139	);	3✔
140	}	3✔
141	);	7✔
142
143
144	// Memset - CUDA with data transfers
145	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
146	sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
147	[](codegen::LanguageExtension& language_extension,	7✔
148	const Function& function,	7✔
149	const data_flow::DataFlowGraph& data_flow_graph,	7✔
150	const data_flow::LibraryNode& node) {	7✔
151	return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithTransfers>(	×
152	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)	×
153	);	×
UNCOV 154	}	×
155	);	7✔
156	// Memset - CUDA without data transfers
157	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
158	sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
159	[](codegen::LanguageExtension& language_extension,	7✔
160	const Function& function,	7✔
161	const data_flow::DataFlowGraph& data_flow_graph,	7✔
162	const data_flow::LibraryNode& node) {	7✔
163	return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithoutTransfers>(	×
164	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)	×
165	);	×
UNCOV 166	}	×
167	);	7✔
168
169
170	// Memcpy - CUDA with data transfers
171	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
172	sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),	7✔
173	[](codegen::LanguageExtension& language_extension,	7✔
174	const Function& function,	7✔
175	const data_flow::DataFlowGraph& data_flow_graph,	7✔
176	const data_flow::LibraryNode& node) {	7✔
UNCOV 177	return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithTransfers>(	×
UNCOV 178	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)	×
UNCOV 179	);	×
UNCOV 180	}	×
181	);	7✔
182	// Memcpy - CUDA without data transfers
183	libNodeDispatcherRegistry.register_library_node_dispatcher(	7✔
184	sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),	7✔
185	[](codegen::LanguageExtension& language_extension,	7✔
186	const Function& function,	7✔
187	const data_flow::DataFlowGraph& data_flow_graph,	7✔
188	const data_flow::LibraryNode& node) {	7✔
UNCOV 189	return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithoutTransfers>(	×
UNCOV 190	language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)	×
UNCOV 191	);	×
UNCOV 192	}	×
193	);	7✔
194
195
196	context.scheduler_registry	7✔
197	.register_loop_scheduler<passes::scheduler::CUDAScheduler>(passes::scheduler::CUDAScheduler::target());	7✔
198	}	7✔
199
200	} // namespace sdfg::cuda

daisytuner / docc / 28106147644

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous