• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28148559604

24 Jun 2026 03:10PM UTC coverage: 61.582% (-0.3%) from 61.844%
28148559604

push

github

web-flow
Merge pull request #808 from daisytuner/CatchUpRocm

enable batched gemm and memcpy on AMD GPUs analogous to the existing CUDA implementation

58 of 329 new or added lines in 6 files covered. (17.63%)

8 existing lines in 3 files now uncovered.

37576 of 61018 relevant lines covered (61.58%)

999.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

57.24
/opt/src/transformations/offloading/rocm_stdlib_data_transfer_extraction.cpp
1
#include "sdfg/transformations/offloading/rocm_stdlib_data_transfer_extraction.h"
2

3
#include <cassert>
4
#include <string>
5
#include <unordered_map>
6

7
#include "sdfg/analysis/analysis.h"
8
#include "sdfg/builder/structured_sdfg_builder.h"
9
#include "sdfg/data_flow/access_node.h"
10
#include "sdfg/data_flow/library_nodes/stdlib/memcpy.h"
11
#include "sdfg/data_flow/library_nodes/stdlib/memset.h"
12
#include "sdfg/exceptions.h"
13
#include "sdfg/structured_control_flow/block.h"
14
#include "sdfg/structured_control_flow/sequence.h"
15
#include "sdfg/symbolic/symbolic.h"
16
#include "sdfg/targets/rocm/rocm.h"
17
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
18
#include "sdfg/types/type.h"
19
#include "sdfg/types/utils.h"
20
#include "symengine/symengine_rcp.h"
21

22
namespace sdfg {
23
namespace rocm {
24

25
std::string ROCMStdlibDataTransferExtraction::create_device_container(
26
    builder::StructuredSDFGBuilder& builder, const types::Pointer& type, const symbolic::Expression& size
27
) {
2✔
28
    auto new_type = type.clone();
2✔
29
    new_type->storage_type(types::StorageType(
2✔
30
        "AMD_Generic", size, types::StorageType::AllocationType::Unmanaged, types::StorageType::AllocationType::Unmanaged
2✔
31
    ));
2✔
32
    auto device_container = builder.find_new_name(ROCM_DEVICE_PREFIX);
2✔
33
    builder.add_container(device_container, *new_type);
2✔
34
    return device_container;
2✔
35
}
2✔
36

37
void ROCMStdlibDataTransferExtraction::create_allocate(
38
    builder::StructuredSDFGBuilder& builder,
39
    structured_control_flow::Sequence& sequence,
40
    structured_control_flow::Block& block,
41
    const std::string& device_container,
42
    const symbolic::Expression& size,
43
    const types::Pointer& type
44
) {
2✔
45
    auto& alloc_block = builder.add_block_before(sequence, block, {}, block.debug_info());
2✔
46
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
2✔
47
        builder,
2✔
48
        alloc_block,
2✔
49
        device_container,
2✔
50
        device_container,
2✔
51
        offloading::DataTransferDirection::NONE,
2✔
52
        offloading::BufferLifecycle::ALLOC,
2✔
53
        type,
2✔
54
        type,
2✔
55
        this->lib_node_.debug_info(),
2✔
56
        size,
2✔
57
        symbolic::zero()
2✔
58
    );
2✔
59
}
2✔
60

61
void ROCMStdlibDataTransferExtraction::create_deallocate(
62
    builder::StructuredSDFGBuilder& builder,
63
    structured_control_flow::Sequence& sequence,
64
    structured_control_flow::Block& block,
65
    const std::string& device_container,
66
    const types::Pointer& type
67
) {
×
68
    auto& dealloc_block = builder.add_block_after(sequence, block, {}, block.debug_info());
×
69
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
70
        builder,
×
71
        dealloc_block,
×
72
        device_container,
×
73
        device_container,
×
74
        offloading::DataTransferDirection::NONE,
×
75
        offloading::BufferLifecycle::FREE,
×
76
        type,
×
77
        type,
×
NEW
78
        this->lib_node_.debug_info(),
×
79
        SymEngine::null,
×
80
        symbolic::zero()
×
81
    );
×
82
}
×
83

84
void ROCMStdlibDataTransferExtraction::create_copy_from_device_with_deallocation(
85
    builder::StructuredSDFGBuilder& builder,
86
    structured_control_flow::Sequence& sequence,
87
    structured_control_flow::Block& block,
88
    const std::string& host_container,
89
    const std::string& device_container,
90
    const symbolic::Expression& size,
91
    const types::Pointer& type
92
) {
2✔
93
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
2✔
94
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
2✔
95
        builder,
2✔
96
        copy_block,
2✔
97
        host_container,
2✔
98
        device_container,
2✔
99
        offloading::DataTransferDirection::D2H,
2✔
100
        offloading::BufferLifecycle::FREE,
2✔
101
        type,
2✔
102
        type,
2✔
103
        this->lib_node_.debug_info(),
2✔
104
        size,
2✔
105
        symbolic::zero()
2✔
106
    );
2✔
107
}
2✔
108

109
void ROCMStdlibDataTransferExtraction::create_copy_to_device_with_allocation(
110
    builder::StructuredSDFGBuilder& builder,
111
    structured_control_flow::Sequence& sequence,
112
    structured_control_flow::Block& block,
113
    const std::string& host_container,
114
    const std::string& device_container,
115
    const symbolic::Expression& size,
116
    const types::Pointer& type
NEW
117
) {
×
NEW
118
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
119
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
120
        builder,
×
NEW
121
        copy_block,
×
NEW
122
        host_container,
×
NEW
123
        device_container,
×
NEW
124
        offloading::DataTransferDirection::H2D,
×
NEW
125
        offloading::BufferLifecycle::ALLOC,
×
NEW
126
        type,
×
NEW
127
        type,
×
NEW
128
        this->lib_node_.debug_info(),
×
NEW
129
        size,
×
NEW
130
        symbolic::zero()
×
NEW
131
    );
×
NEW
132
}
×
133

134
ROCMStdlibDataTransferExtraction::ROCMStdlibDataTransferExtraction(data_flow::LibraryNode& lib_node)
135
    : lib_node_(lib_node) {}
8✔
136

137
std::string ROCMStdlibDataTransferExtraction::name() const { return "ROCMStdlibDataTransferExtraction"; }
1✔
138

139
bool ROCMStdlibDataTransferExtraction::
140
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
7✔
141
    if (this->lib_node_.implementation_type().value() != rocm::ImplementationType_ROCMWithTransfers.value()) {
7✔
142
        return false;
4✔
143
    }
4✔
144

145
    // Restrict to nodes in their own block
146
    auto& dfg = this->lib_node_.get_parent();
3✔
147
    if (dfg.nodes().size() != dfg.in_degree(this->lib_node_) + dfg.out_degree(this->lib_node_) + 1) {
3✔
148
        return false;
×
149
    }
×
150

151
    // Supported stdlib nodes
152
    if (dynamic_cast<stdlib::MemsetNode*>(&this->lib_node_)) {
3✔
153
        return true;
3✔
154
    } else if (dynamic_cast<stdlib::MemcpyNode*>(&this->lib_node_)) {
3✔
NEW
155
        return true;
×
NEW
156
    } else {
×
NEW
157
        return false;
×
NEW
158
    }
×
159
}
3✔
160

161
void ROCMStdlibDataTransferExtraction::
162
    apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
163
    // Get data flow graph and block
164
    auto& dfg = this->lib_node_.get_parent();
2✔
165
    auto* block = dynamic_cast<structured_control_flow::Block*>(dfg.get_parent());
2✔
166
    assert(block);
2✔
167

168
    // Get sequence
169
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(block->get_parent());
2✔
170
    assert(sequence);
2✔
171

172
    if (dynamic_cast<stdlib::MemsetNode*>(&this->lib_node_)) {
2✔
173
        this->apply_memset(builder, analysis_manager, dfg, *sequence, *block);
2✔
174
    } else if (dynamic_cast<stdlib::MemcpyNode*>(&this->lib_node_)) {
2✔
NEW
175
        this->apply_memcpy(builder, analysis_manager, dfg, *sequence, *block);
×
NEW
176
    }
×
177

178
    // Change the implementation type to without transfers
179
    this->lib_node_.implementation_type() = rocm::ImplementationType_ROCMWithoutTransfers;
2✔
180
}
2✔
181

182
void ROCMStdlibDataTransferExtraction::apply_memset(
183
    builder::StructuredSDFGBuilder& builder,
184
    analysis::AnalysisManager& analysis_manager,
185
    data_flow::DataFlowGraph& dfg,
186
    structured_control_flow::Sequence& sequence,
187
    structured_control_flow::Block& block
188
) {
2✔
189
    auto& memset_node = static_cast<stdlib::MemsetNode&>(this->lib_node_);
2✔
190

191
    // Capture output accesses
192
    auto ptr_edge = dfg.in_edge_for_connector(memset_node, "_ptr");
2✔
193
    auto& host_access_node =
2✔
194
        const_cast<data_flow::AccessNode&>(static_cast<const data_flow::AccessNode&>(ptr_edge->src()));
2✔
195
    auto& host_container_name = host_access_node.data();
2✔
196

197
    // Use the host container's actual type to avoid type mismatches
198
    auto& host_type = builder.subject().type(host_container_name);
2✔
199
    auto& type = static_cast<const types::Pointer&>(host_type);
2✔
200

201
    auto ptr_size = memset_node.num();
2✔
202
    auto dPtr = this->create_device_container(builder, type, ptr_size);
2✔
203

204
    // Allocate device buffer
205
    this->create_allocate(builder, sequence, block, dPtr, ptr_size, type);
2✔
206

207
    // Copy from device to host and deallocate
208
    this->create_copy_from_device_with_deallocation(builder, sequence, block, host_container_name, dPtr, ptr_size, type);
2✔
209

210
    // Redirect output to device container
211
    host_access_node.data(dPtr);
2✔
212
}
2✔
213

214
void ROCMStdlibDataTransferExtraction::apply_memcpy(
215
    builder::StructuredSDFGBuilder& builder,
216
    analysis::AnalysisManager& analysis_manager,
217
    data_flow::DataFlowGraph& dfg,
218
    structured_control_flow::Sequence& sequence,
219
    structured_control_flow::Block& block
NEW
220
) {
×
NEW
221
    auto& memcpy_node = static_cast<stdlib::MemcpyNode&>(this->lib_node_);
×
NEW
222
    auto ptr_size = memcpy_node.count();
×
223

224
    // Handle _src (read) - need H2D transfer
NEW
225
    auto src_edge = dfg.in_edge_for_connector(memcpy_node, "_src");
×
NEW
226
    auto& src_access_node = const_cast<data_flow::AccessNode&>(static_cast<const data_flow::AccessNode&>(src_edge->src()
×
NEW
227
    ));
×
NEW
228
    auto& src_container_name = src_access_node.data();
×
NEW
229
    auto& src_type = static_cast<const types::Pointer&>(builder.subject().type(src_container_name));
×
230

NEW
231
    auto dSrc = this->create_device_container(builder, src_type, ptr_size);
×
NEW
232
    this->create_copy_to_device_with_allocation(builder, sequence, block, src_container_name, dSrc, ptr_size, src_type);
×
NEW
233
    this->create_deallocate(builder, sequence, block, dSrc, src_type);
×
NEW
234
    src_access_node.data(dSrc);
×
235

236
    // Handle _dst (write) - need D2H transfer
NEW
237
    auto dst_edge = dfg.in_edge_for_connector(memcpy_node, "_dst");
×
NEW
238
    auto& dst_access_node = const_cast<data_flow::AccessNode&>(static_cast<const data_flow::AccessNode&>(dst_edge->src()
×
NEW
239
    ));
×
NEW
240
    auto& dst_container_name = dst_access_node.data();
×
NEW
241
    auto& dst_type = static_cast<const types::Pointer&>(builder.subject().type(dst_container_name));
×
242

NEW
243
    auto dDst = this->create_device_container(builder, dst_type, ptr_size);
×
NEW
244
    this->create_allocate(builder, sequence, block, dDst, ptr_size, dst_type);
×
NEW
245
    this->create_copy_from_device_with_deallocation(builder, sequence, block, dst_container_name, dDst, ptr_size, dst_type);
×
NEW
246
    dst_access_node.data(dDst);
×
UNCOV
247
}
×
248

249
void ROCMStdlibDataTransferExtraction::to_json(nlohmann::json& j) const {
1✔
250
    j["transformation_type"] = this->name();
1✔
251
    j["parameters"] = nlohmann::json::object();
1✔
252
    j["subgraph"] = {{"0", {{"element_id", this->lib_node_.element_id()}, {"type", "unknown"}}}};
1✔
253
}
1✔
254

255
} // namespace rocm
256
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc