• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28108702565

24 Jun 2026 03:10PM UTC coverage: 61.582% (-0.3%) from 61.844%
28108702565

push

github

web-flow
Merge pull request #808 from daisytuner/CatchUpRocm

enable batched gemm and memcpy on AMD GPUs analogous to the existing CUDA implementation

58 of 329 new or added lines in 6 files covered. (17.63%)

8 existing lines in 3 files now uncovered.

37576 of 61018 relevant lines covered (61.58%)

999.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

69.8
/opt/src/targets/rocm/plugin.cpp
1
#include "sdfg/targets/rocm/plugin.h"
2

3
namespace sdfg::rocm {
4

5
void register_rocm_plugin(plugins::Context& context) {
3✔
6
    auto& libNodeDispatcherRegistry = context.library_node_dispatcher_registry;
3✔
7
    auto& mapDispatcherRegistry = context.map_dispatcher_registry;
3✔
8
    auto& libNodeSerRegistry = context.library_node_serializer_registry;
3✔
9

10
    mapDispatcherRegistry.register_map_dispatcher(
3✔
11
        ScheduleType_ROCM::value(),
3✔
12
        [](codegen::LanguageExtension& language_extension,
3✔
13
           StructuredSDFG& sdfg,
3✔
14
           analysis::AnalysisManager& analysis_manager,
3✔
15
           structured_control_flow::Map& node,
3✔
16
           codegen::InstrumentationPlan& instrumentation_plan,
3✔
17
           codegen::ArgCapturePlan& arg_capture_plan) {
3✔
18
            return std::make_unique<ROCMMapDispatcher>(
×
19
                language_extension, sdfg, analysis_manager, node, instrumentation_plan, arg_capture_plan
×
20
            );
×
21
        }
×
22
    );
3✔
23

24
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
25
        rocm::LibraryNodeType_ROCM_Offloading.value() + "::" + data_flow::ImplementationType_NONE.value(),
3✔
26
        [](codegen::LanguageExtension& language_extension,
3✔
27
           const Function& function,
3✔
28
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
29
           const data_flow::LibraryNode& node) {
3✔
30
            return std::make_unique<
×
31
                rocm::ROCMDataOffloadingNodeDispatcher>(language_extension, function, data_flow_graph, node);
×
32
        }
×
33
    );
3✔
34

35
    libNodeSerRegistry.register_library_node_serializer(rocm::LibraryNodeType_ROCM_Offloading.value(), []() {
3✔
36
        return std::make_unique<rocm::ROCMDataOffloadingNodeSerializer>();
×
37
    });
×
38

39

40
    // Dot - ROCMBLAS with data transfers
41
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
42
        math::blas::LibraryNodeType_DOT.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
43
        [](codegen::LanguageExtension& language_extension,
3✔
44
           const Function& function,
3✔
45
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
46
           const data_flow::LibraryNode& node) {
3✔
47
            return std::make_unique<blas::DotNodeDispatcher_ROCMBLASWithTransfers>(
×
48
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
49
            );
×
50
        }
×
51
    );
3✔
52
    // Dot - ROCMBLAS without data transfers
53
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
54
        math::blas::LibraryNodeType_DOT.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
55
        [](codegen::LanguageExtension& language_extension,
3✔
56
           const Function& function,
3✔
57
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
58
           const data_flow::LibraryNode& node) {
3✔
59
            return std::make_unique<blas::DotNodeDispatcher_ROCMBLASWithoutTransfers>(
×
60
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::DotNode&>(node)
×
61
            );
×
62
        }
×
63
    );
3✔
64

65
    // GEMM - ROCMBLAS with data transfers
66
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
67
        math::blas::LibraryNodeType_GEMM.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
68
        [](codegen::LanguageExtension& language_extension,
3✔
69
           const Function& function,
3✔
70
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
71
           const data_flow::LibraryNode& node) {
3✔
72
            return std::make_unique<blas::GEMMNodeDispatcher_ROCMBLASWithTransfers>(
×
73
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::GEMMNode&>(node)
×
74
            );
×
75
        }
×
76
    );
3✔
77

78
    // GEMM - ROCM hand-tuned kernel (data already on GPU)
79
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
80
        math::blas::LibraryNodeType_GEMM.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
81
        [](codegen::LanguageExtension& language_extension,
3✔
82
           const Function& function,
3✔
83
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
84
           const data_flow::LibraryNode& node) -> std::unique_ptr<codegen::LibraryNodeDispatcher> {
3✔
85
            auto& library_node = dynamic_cast<const math::blas::GEMMNode&>(node);
2✔
86
            if (library_node.precision() != math::blas::BLAS_Precision::s) {
2✔
87
                return std::make_unique<blas::GEMMNodeDispatcher_ROCMBLASWithoutTransfers>(
1✔
88
                    language_extension, function, data_flow_graph, library_node
1✔
89
                );
1✔
90
            } else {
1✔
91
                return std::make_unique<
1✔
92
                    blas::GEMMNodeDispatcher_ROCMHandTuned>(language_extension, function, data_flow_graph, library_node);
1✔
93
            }
1✔
94
        }
2✔
95
    );
3✔
96

97
    // BatchedGEMM - ROCMBLAS with data transfers
98
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
99
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
100
        [](codegen::LanguageExtension& language_extension,
3✔
101
           const Function& function,
3✔
102
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
103
           const data_flow::LibraryNode& node) {
3✔
NEW
104
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_ROCMBLASWithTransfers>(
×
NEW
105
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
NEW
106
            );
×
NEW
107
        }
×
108
    );
3✔
109
    // BatchedGEMM - ROCMBLAS without data transfers
110
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
111
        math::blas::LibraryNodeType_BatchedGEMM.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
112
        [](codegen::LanguageExtension& language_extension,
3✔
113
           const Function& function,
3✔
114
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
115
           const data_flow::LibraryNode& node) {
3✔
NEW
116
            return std::make_unique<blas::BatchedGEMMNodeDispatcher_ROCMBLASWithoutTransfers>(
×
NEW
117
                language_extension, function, data_flow_graph, dynamic_cast<const math::blas::BatchedGEMMNode&>(node)
×
NEW
118
            );
×
NEW
119
        }
×
120
    );
3✔
121

122

123
    // Memset - ROCM with data transfers
124
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
125
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
126
        [](codegen::LanguageExtension& language_extension,
3✔
127
           const Function& function,
3✔
128
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
129
           const data_flow::LibraryNode& node) {
3✔
130
            return std::make_unique<rocm::stdlib::MemsetNodeDispatcher_ROCMWithTransfers>(
×
131
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
132
            );
×
133
        }
×
134
    );
3✔
135
    // Memset - ROCM without data transfers
136
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
137
        sdfg::stdlib::LibraryNodeType_Memset.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
138
        [](codegen::LanguageExtension& language_extension,
3✔
139
           const Function& function,
3✔
140
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
141
           const data_flow::LibraryNode& node) {
3✔
142
            return std::make_unique<rocm::stdlib::MemsetNodeDispatcher_ROCMWithoutTransfers>(
×
143
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemsetNode&>(node)
×
144
            );
×
145
        }
×
146
    );
3✔
147

148

149
    // Memcpy - ROCM with data transfers
150
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
151
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + rocm::ImplementationType_ROCMWithTransfers.value(),
3✔
152
        [](codegen::LanguageExtension& language_extension,
3✔
153
           const Function& function,
3✔
154
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
155
           const data_flow::LibraryNode& node) {
3✔
NEW
156
            return std::make_unique<rocm::stdlib::MemcpyNodeDispatcher_ROCMWithTransfers>(
×
NEW
157
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
NEW
158
            );
×
NEW
159
        }
×
160
    );
3✔
161
    // Memcpy - ROCM without data transfers
162
    libNodeDispatcherRegistry.register_library_node_dispatcher(
3✔
163
        sdfg::stdlib::LibraryNodeType_Memcpy.value() + "::" + rocm::ImplementationType_ROCMWithoutTransfers.value(),
3✔
164
        [](codegen::LanguageExtension& language_extension,
3✔
165
           const Function& function,
3✔
166
           const data_flow::DataFlowGraph& data_flow_graph,
3✔
167
           const data_flow::LibraryNode& node) {
3✔
NEW
168
            return std::make_unique<rocm::stdlib::MemcpyNodeDispatcher_ROCMWithoutTransfers>(
×
NEW
169
                language_extension, function, data_flow_graph, dynamic_cast<const sdfg::stdlib::MemcpyNode&>(node)
×
NEW
170
            );
×
NEW
171
        }
×
172
    );
3✔
173

174

175
    context.scheduler_registry
3✔
176
        .register_loop_scheduler<passes::scheduler::ROCMScheduler>(passes::scheduler::ROCMScheduler::target());
3✔
177
}
3✔
178

179
} // namespace sdfg::rocm
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc