• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28302975093

27 Jun 2026 09:55PM UTC coverage: 61.929% (+0.2%) from 61.754%
28302975093

Pull #814

github

web-flow
Merge d9618e50c into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

567 of 854 new or added lines in 16 files covered. (66.39%)

17 existing lines in 1 file now uncovered.

39450 of 63702 relevant lines covered (61.93%)

968.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

75.56
/opt/src/targets/cuda/plugin.cpp
1
#include "sdfg/targets/cuda/plugin.h"
2

3
#include "sdfg/targets/cuda/cuda_reduce_dispatcher.h"
4

5

6
namespace sdfg::cuda {
7

8
void register_cuda_plugin(plugins::Context& context) {
7✔
9
    auto& libNodeDispatcherRegistry = context.library_node_dispatcher_registry;
7✔
10
    auto& mapDispatcherRegistry = context.map_dispatcher_registry;
7✔
11
    auto& reduceDispatcherRegistry = context.reduce_dispatcher_registry;
7✔
12
    auto& libNodeSerRegistry = context.library_node_serializer_registry;
7✔
13

14
    mapDispatcherRegistry.register_map_dispatcher(
7✔
15
        ScheduleType_CUDA::value(),
7✔
16
        [](codegen::LanguageExtension& language_extension,
7✔
17
           StructuredSDFG& sdfg,
7✔
18
           analysis::AnalysisManager& analysis_manager,
7✔
19
           structured_control_flow::Map& node,
7✔
20
           codegen::InstrumentationPlan& instrumentation_plan,
7✔
21
           codegen::ArgCapturePlan& arg_capture_plan) {
7✔
22
            return std::make_unique<CUDAMapDispatcher>(
4✔
23
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
4✔
24
            );
4✔
25
        }
4✔
26
    );
7✔
27

28
    reduceDispatcherRegistry.register_reduce_dispatcher(
7✔
29
        ScheduleType_CUDA::value(),
7✔
30
        [](codegen::LanguageExtension& language_extension,
7✔
31
           StructuredSDFG& sdfg,
7✔
32
           analysis::AnalysisManager& analysis_manager,
7✔
33
           structured_control_flow::Reduce& node,
7✔
34
           codegen::InstrumentationPlan& instrumentation_plan,
7✔
35
           codegen::ArgCapturePlan& arg_capture_plan) {
7✔
NEW
36
            return std::make_unique<CUDAReduceDispatcher>(
×
NEW
37
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
×
NEW
38
            );
×
NEW
39
        }
×
40
    );
7✔
41

42
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
43
        cuda::LibraryNodeType_CUDA_Offloading.value() + "::" + data_flow::ImplementationType_NONE.value(),
7✔
44
        [](codegen::LanguageExtension& language_extension,
7✔
45
           const Function& function,
7✔
46
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
47
           const data_flow::LibraryNode& node) {
7✔
48
            return std::make_unique<
2✔
49
                cuda::CUDADataOffloadingNodeDispatcher>(language_extension, function, data_flow_graph, node);
2✔
50
        }
2✔
51
    );
7✔
52

53
    libNodeSerRegistry.register_library_node_serializer(cuda::LibraryNodeType_CUDA_Offloading.value(), []() {
8✔
54
        return std::make_unique<cuda::CUDADataOffloadingNodeSerializer>();
8✔
55
    });
8✔
56

57

58
    // Dot - CUBLAS with data transfers
59
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
60
        math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
61
        [](codegen::LanguageExtension& language_extension,
7✔
62
           const Function& function,
7✔
63
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
64
           const data_flow::LibraryNode& node) {
7✔
65
            return std::make_unique<blas::DotNodeDispatcher_CUBLASWithTransfers>(
×
66
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
67
            );
×
68
        }
×
69
    );
7✔
70
    // Dot - CUBLAS without data transfers
71
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
72
        math::blas::LibraryNodeType_DOT.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
73
        [](codegen::LanguageExtension& language_extension,
7✔
74
           const Function& function,
7✔
75
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
76
           const data_flow::LibraryNode& node) {
7✔
77
            return std::make_unique<blas::DotNodeDispatcher_CUBLASWithoutTransfers>(
×
78
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
79
            );
×
80
        }
×
81
    );
7✔
82

83
    // GEMM - CUBLAS with data transfers
84
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
85
        math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
86
        [](codegen::LanguageExtension& language_extension,
7✔
87
           const Function& function,
7✔
88
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
89
           const data_flow::LibraryNode& node) {
7✔
90
            return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithTransfers>(
×
91
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
×
92
            );
×
93
        }
×
94
    );
7✔
95
    // GEMM - CUBLAS without data transfers
96
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
97
        math::blas::LibraryNodeType_GEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
98
        [](codegen::LanguageExtension& language_extension,
7✔
99
           const Function& function,
7✔
100
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
101
           const data_flow::LibraryNode& node) {
7✔
102
            return std::make_unique<blas::GEMMNodeDispatcher_CUBLASWithoutTransfers>(
×
103
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
×
104
            );
×
105
        }
×
106
    );
7✔
107

108
    // BatchedGEMM - CUBLAS with data transfers
109
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
110
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
111
        [](codegen::LanguageExtension& language_extension,
7✔
112
           const Function& function,
7✔
113
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
114
           const data_flow::LibraryNode& node) {
7✔
115
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithTransfers>(
×
116
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
117
            );
×
118
        }
×
119
    );
7✔
120
    // BatchedGEMM - CUBLAS without data transfers
121
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
122
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
123
        [](codegen::LanguageExtension& language_extension,
7✔
124
           const Function& function,
7✔
125
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
126
           const data_flow::LibraryNode& node) {
7✔
127
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_CUBLASWithoutTransfers>(
×
128
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
129
            );
×
130
        }
×
131
    );
7✔
132

133

134
    // Softmax - CUDA with data transfers
135
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
136
        sdfg::math::tensor::LibraryNodeType_Softmax.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
137
        [](codegen::LanguageExtension& language_extension,
7✔
138
           const Function& function,
7✔
139
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
140
           const data_flow::LibraryNode& node) {
7✔
141
            return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithTransfers>(
2✔
142
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)
2✔
143
            );
2✔
144
        }
2✔
145
    );
7✔
146
    // Softmax - CUDA without data transfers
147
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
148
        sdfg::math::tensor::LibraryNodeType_Softmax.value() +
7✔
149
            "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
150
        [](codegen::LanguageExtension& language_extension,
7✔
151
           const Function& function,
7✔
152
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
153
           const data_flow::LibraryNode& node) {
7✔
154
            return std::make_unique<tensor::SoftmaxNodeDispatcher_CUDAWithoutTransfers>(
3✔
155
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::math::tensor::SoftmaxNode&>(node)
3✔
156
            );
3✔
157
        }
3✔
158
    );
7✔
159

160

161
    // Memset - CUDA with data transfers
162
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
163
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
164
        [](codegen::LanguageExtension& language_extension,
7✔
165
           const Function& function,
7✔
166
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
167
           const data_flow::LibraryNode& node) {
7✔
168
            return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithTransfers>(
×
169
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
170
            );
×
171
        }
×
172
    );
7✔
173
    // Memset - CUDA without data transfers
174
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
175
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
176
        [](codegen::LanguageExtension& language_extension,
7✔
177
           const Function& function,
7✔
178
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
179
           const data_flow::LibraryNode& node) {
7✔
180
            return std::make_unique<cuda::stdlib::MemsetNodeDispatcher_CUDAWithoutTransfers>(
×
181
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
182
            );
×
183
        }
×
184
    );
7✔
185

186

187
    // Memcpy - CUDA with data transfers
188
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
189
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithTransfers.value(),
7✔
190
        [](codegen::LanguageExtension& language_extension,
7✔
191
           const Function& function,
7✔
192
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
193
           const data_flow::LibraryNode& node) {
7✔
194
            return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithTransfers>(
×
195
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
196
            );
×
197
        }
×
198
    );
7✔
199
    // Memcpy - CUDA without data transfers
200
    libNodeDispatcherRegistry.register_library_node_dispatcher(
7✔
201
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + cuda::ImplementationType_CUDAWithoutTransfers.value(),
7✔
202
        [](codegen::LanguageExtension& language_extension,
7✔
203
           const Function& function,
7✔
204
           const data_flow::DataFlowGraph& data_flow_graph,
7✔
205
           const data_flow::LibraryNode& node) {
7✔
206
            return std::make_unique<cuda::stdlib::MemcpyNodeDispatcher_CUDAWithoutTransfers>(
×
207
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
208
            );
×
209
        }
×
210
    );
7✔
211

212

213
    context.scheduler_registry
7✔
214
        .register_loop_scheduler<passes::scheduler::CUDAScheduler>(passes::scheduler::CUDAScheduler::target());
7✔
215
}
7✔
216

217
} // namespace sdfg::cuda
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc