• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28302975093

27 Jun 2026 09:55PM UTC coverage: 61.929% (+0.2%) from 61.754%
28302975093

Pull #814

github

web-flow
Merge d9618e50c into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

567 of 854 new or added lines in 16 files covered. (66.39%)

17 existing lines in 1 file now uncovered.

39450 of 63702 relevant lines covered (61.93%)

968.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.94
/opt/src/targets/rocm/plugin.cpp
1
#include "sdfg/targets/rocm/plugin.h"
2

3
#include "sdfg/targets/rocm/rocm_reduce_dispatcher.h"
4

5
namespace sdfg::rocm {
6

7
void register_rocm_plugin(plugins::Context& context) {
3✔
8
    auto& libNodeDispatcherRegistry = context.library_node_dispatcher_registry;
3✔
9
    auto& mapDispatcherRegistry = context.map_dispatcher_registry;
3✔
10
    auto& reduceDispatcherRegistry = context.reduce_dispatcher_registry;
3✔
11
    auto& libNodeSerRegistry = context.library_node_serializer_registry;
3✔
12

13
    mapDispatcherRegistry.register_map_dispatcher(
3✔
14
        ScheduleType_ROCM::value(),
3✔
15
        [](codegen::LanguageExtension& language_extension,
3✔
16
           StructuredSDFG& sdfg,
3✔
17
           analysis::AnalysisManager& analysis_manager,
3✔
18
           structured_control_flow::Map& node,
3✔
19
           codegen::InstrumentationPlan& instrumentation_plan,
3✔
20
           codegen::ArgCapturePlan& arg_capture_plan) {
3✔
21
            return std::make_unique<ROCMMapDispatcher>(
×
22
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
×
23
            );
×
24
        }
×
25
    );
3✔
26

27
    reduceDispatcherRegistry.register_reduce_dispatcher(
3✔
28
        ScheduleType_ROCM::value(),
3✔
29
        [](codegen::LanguageExtension& language_extension,
3✔
30
           StructuredSDFG& sdfg,
3✔
31
           analysis::AnalysisManager& analysis_manager,
3✔
32
           structured_control_flow::Reduce& node,
3✔
33
           codegen::InstrumentationPlan& instrumentation_plan,
3✔
34
           codegen::ArgCapturePlan& arg_capture_plan) {
3✔
NEW
35
            return std::make_unique<ROCMReduceDispatcher>(
×
NEW
36
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
×
NEW
37
            );
×
NEW
38
        }
×
39
    );
3✔
40

41
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
42
        rocm::LibraryNodeType_ROCM_Offloading.value() + "::" + data_flow::ImplementationType_NONE.value(),
3✔
43
        [](codegen::LanguageExtension& language_extension,
3✔
44
           const Function& function,
3✔
45
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
46
           const data_flow::LibraryNode& node) {
3✔
47
            return std::make_unique<
×
48
                rocm::ROCMDataOffloadingNodeDispatcher>(language_extension, function, data_flow_graph, node);
×
49
        }
×
50
    );
3✔
51

52
    libNodeSerRegistry.register_library_node_serializer(rocm::LibraryNodeType_ROCM_Offloading.value(), []() {
3✔
53
        return std::make_unique<rocm::ROCMDataOffloadingNodeSerializer>();
×
54
    });
×
55

56

57
    // Dot - ROCMBLAS with data transfers
58
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
59
        math::blas::LibraryNodeType_DOT.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
60
        [](codegen::LanguageExtension& language_extension,
3✔
61
           const Function& function,
3✔
62
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
63
           const data_flow::LibraryNode& node) {
3✔
64
            return std::make_unique<blas::DotNodeDispatcher_ROCMBLASWithTransfers>(
×
65
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
66
            );
×
67
        }
×
68
    );
3✔
69
    // Dot - ROCMBLAS without data transfers
70
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
71
        math::blas::LibraryNodeType_DOT.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
72
        [](codegen::LanguageExtension& language_extension,
3✔
73
           const Function& function,
3✔
74
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
75
           const data_flow::LibraryNode& node) {
3✔
76
            return std::make_unique<blas::DotNodeDispatcher_ROCMBLASWithoutTransfers>(
×
77
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
78
            );
×
79
        }
×
80
    );
3✔
81

82
    // GEMM - ROCMBLAS with data transfers
83
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
84
        math::blas::LibraryNodeType_GEMM.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
85
        [](codegen::LanguageExtension& language_extension,
3✔
86
           const Function& function,
3✔
87
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
88
           const data_flow::LibraryNode& node) {
3✔
89
            return std::make_unique<blas::GEMMNodeDispatcher_ROCMBLASWithTransfers>(
×
90
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
×
91
            );
×
92
        }
×
93
    );
3✔
94

95
    // GEMM - ROCM hand-tuned kernel (data already on GPU)
96
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
97
        math::blas::LibraryNodeType_GEMM.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
98
        [](codegen::LanguageExtension& language_extension,
3✔
99
           const Function& function,
3✔
100
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
101
           const data_flow::LibraryNode& node) -> std::unique_ptr<codegen::LibraryNodeDispatcher> {
3✔
102
            auto& library_node = dynamic_cast<const math::blas::GEMMNode&>(node);
2✔
103
            if (library_node.precision() != math::blas::BLAS_Precision::s) {
2✔
104
                return std::make_unique<blas::GEMMNodeDispatcher_ROCMBLASWithoutTransfers>(
1✔
105
                    language_extension, function, data_flow_graph, library_node
1✔
106
                );
1✔
107
            } else {
1✔
108
                return std::make_unique<
1✔
109
                    blas::GEMMNodeDispatcher_ROCMHandTuned>(language_extension, function, data_flow_graph, library_node);
1✔
110
            }
1✔
111
        }
2✔
112
    );
3✔
113

114
    // BatchedGEMM - ROCMBLAS with data transfers
115
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
116
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
117
        [](codegen::LanguageExtension& language_extension,
3✔
118
           const Function& function,
3✔
119
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
120
           const data_flow::LibraryNode& node) {
3✔
121
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_ROCMBLASWithTransfers>(
×
122
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
123
            );
×
124
        }
×
125
    );
3✔
126
    // BatchedGEMM - ROCMBLAS without data transfers
127
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
128
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
129
        [](codegen::LanguageExtension& language_extension,
3✔
130
           const Function& function,
3✔
131
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
132
           const data_flow::LibraryNode& node) {
3✔
133
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_ROCMBLASWithoutTransfers>(
×
134
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
135
            );
×
136
        }
×
137
    );
3✔
138

139

140
    // Memset - ROCM with data transfers
141
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
142
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
143
        [](codegen::LanguageExtension& language_extension,
3✔
144
           const Function& function,
3✔
145
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
146
           const data_flow::LibraryNode& node) {
3✔
147
            return std::make_unique<rocm::stdlib::MemsetNodeDispatcher_ROCMWithTransfers>(
×
148
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
149
            );
×
150
        }
×
151
    );
3✔
152
    // Memset - ROCM without data transfers
153
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
154
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
155
        [](codegen::LanguageExtension& language_extension,
3✔
156
           const Function& function,
3✔
157
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
158
           const data_flow::LibraryNode& node) {
3✔
159
            return std::make_unique<rocm::stdlib::MemsetNodeDispatcher_ROCMWithoutTransfers>(
×
160
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
161
            );
×
162
        }
×
163
    );
3✔
164

165

166
    // Memcpy - ROCM with data transfers
167
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
168
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
169
        [](codegen::LanguageExtension& language_extension,
3✔
170
           const Function& function,
3✔
171
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
172
           const data_flow::LibraryNode& node) {
3✔
173
            return std::make_unique<rocm::stdlib::MemcpyNodeDispatcher_ROCMWithTransfers>(
×
174
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
175
            );
×
176
        }
×
177
    );
3✔
178
    // Memcpy - ROCM without data transfers
179
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
180
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
181
        [](codegen::LanguageExtension& language_extension,
3✔
182
           const Function& function,
3✔
183
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
184
           const data_flow::LibraryNode& node) {
3✔
185
            return std::make_unique<rocm::stdlib::MemcpyNodeDispatcher_ROCMWithoutTransfers>(
×
186
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
187
            );
×
188
        }
×
189
    );
3✔
190

191

192
    context.scheduler_registry
3✔
193
        .register_loop_scheduler<passes::scheduler::ROCMScheduler>(passes::scheduler::ROCMScheduler::target());
3✔
194
}
3✔
195

196
} // namespace sdfg::rocm
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc