• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 26463753889

26 May 2026 05:18PM UTC coverage: 60.864% (-0.02%) from 60.886%
26463753889

Pull #719

github

web-flow
Merge 0b90ddd88 into 707dadcf8
Pull Request #719: Libnode ptr edges

961 of 1749 new or added lines in 52 files covered. (54.95%)

90 existing lines in 29 files now uncovered.

35222 of 57870 relevant lines covered (60.86%)

11043.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.04
/opt/src/transformations/offloading/rocblas_data_transfer_extraction.cpp
1
#include "sdfg/transformations/offloading/rocblas_data_transfer_extraction.h"
2

3
#include <cassert>
4
#include <cstddef>
5
#include <nlohmann/json_fwd.hpp>
6
#include <string>
7
#include <unordered_map>
8

9
#include "sdfg/analysis/analysis.h"
10
#include "sdfg/analysis/scope_analysis.h"
11
#include "sdfg/builder/structured_sdfg_builder.h"
12
#include "sdfg/data_flow/access_node.h"
13
#include "sdfg/data_flow/library_nodes/math/blas/dot_node.h"
14
#include "sdfg/data_flow/library_nodes/math/blas/gemm_node.h"
15
#include "sdfg/data_flow/library_nodes/math/math.h"
16
#include "sdfg/element.h"
17
#include "sdfg/exceptions.h"
18
#include "sdfg/structured_control_flow/block.h"
19
#include "sdfg/structured_control_flow/sequence.h"
20
#include "sdfg/symbolic/symbolic.h"
21
#include "sdfg/targets/rocm/rocm.h"
22
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
23
#include "sdfg/transformations/transformation.h"
24
#include "sdfg/types/type.h"
25
#include "sdfg/types/utils.h"
26
#include "symengine/symengine_rcp.h"
27

28
namespace sdfg {
29
namespace rocm {
30

31
std::string ROCBLASDataTransferExtraction::create_device_container(
32
    builder::StructuredSDFGBuilder& builder, const types::Pointer& type, const symbolic::Expression& size
33
) {
5✔
34
    auto new_type = type.clone();
5✔
35
    new_type->storage_type(types::StorageType(
5✔
36
        "AMD_Generic", size, types::StorageType::AllocationType::Unmanaged, types::StorageType::AllocationType::Unmanaged
5✔
37
    ));
5✔
38
    auto device_container = builder.find_new_name(ROCM_DEVICE_PREFIX);
5✔
39
    builder.add_container(device_container, *new_type);
5✔
40
    return device_container;
5✔
41
}
5✔
42

43
void ROCBLASDataTransferExtraction::create_allocate(
44
    builder::StructuredSDFGBuilder& builder,
45
    structured_control_flow::Sequence& sequence,
46
    structured_control_flow::Block& block,
47
    const std::string& device_container,
48
    const symbolic::Expression& size,
49
    const types::Pointer& type
50
) {
×
51
    auto& alloc_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
52
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
53
        builder,
×
54
        alloc_block,
×
NEW
55
        device_container,
×
NEW
56
        device_container,
×
NEW
57
        offloading::DataTransferDirection::NONE,
×
NEW
58
        offloading::BufferLifecycle::ALLOC,
×
NEW
59
        type,
×
NEW
60
        type,
×
61
        this->blas_node_.debug_info(),
×
62
        size,
×
NEW
63
        symbolic::zero()
×
64
    );
×
65
}
×
66

67
void ROCBLASDataTransferExtraction::create_deallocate(
68
    builder::StructuredSDFGBuilder& builder,
69
    structured_control_flow::Sequence& sequence,
70
    structured_control_flow::Block& block,
71
    const std::string& device_container,
72
    const types::Pointer& type
73
) {
4✔
74
    auto& dealloc_block = builder.add_block_after(sequence, block, {}, block.debug_info());
4✔
75
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
4✔
76
        builder,
4✔
77
        dealloc_block,
4✔
78
        device_container,
4✔
79
        device_container,
4✔
80
        offloading::DataTransferDirection::NONE,
4✔
81
        offloading::BufferLifecycle::FREE,
4✔
82
        type,
4✔
83
        type,
4✔
84
        this->blas_node_.debug_info(),
4✔
85
        SymEngine::null,
4✔
86
        symbolic::zero()
4✔
87
    );
4✔
88
}
4✔
89

90
void ROCBLASDataTransferExtraction::create_copy_to_device(
91
    builder::StructuredSDFGBuilder& builder,
92
    structured_control_flow::Sequence& sequence,
93
    structured_control_flow::Block& block,
94
    const std::string& host_container,
95
    const std::string& device_container,
96
    const symbolic::Expression& size,
97
    const types::Pointer& type
98
) {
×
99
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
100
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
101
        builder,
×
102
        copy_block,
×
NEW
103
        host_container,
×
NEW
104
        device_container,
×
NEW
105
        offloading::DataTransferDirection::H2D,
×
NEW
106
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
107
        type,
×
NEW
108
        type,
×
109
        this->blas_node_.debug_info(),
×
110
        size,
×
NEW
111
        symbolic::zero()
×
112
    );
×
113
}
×
114

115
void ROCBLASDataTransferExtraction::create_copy_from_device(
116
    builder::StructuredSDFGBuilder& builder,
117
    structured_control_flow::Sequence& sequence,
118
    structured_control_flow::Block& block,
119
    const std::string& host_container,
120
    const std::string& device_container,
121
    const symbolic::Expression& size,
122
    const types::Pointer& type
123
) {
×
124
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
×
NEW
125
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
126
        builder,
×
127
        copy_block,
×
NEW
128
        host_container,
×
NEW
129
        device_container,
×
NEW
130
        offloading::DataTransferDirection::D2H,
×
NEW
131
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
132
        type,
×
NEW
133
        type,
×
134
        this->blas_node_.debug_info(),
×
135
        size,
×
NEW
136
        symbolic::zero()
×
137
    );
×
138
}
×
139

140
void ROCBLASDataTransferExtraction::create_copy_to_device_with_allocation(
141
    builder::StructuredSDFGBuilder& builder,
142
    structured_control_flow::Sequence& sequence,
143
    structured_control_flow::Block& block,
144
    const std::string& host_container,
145
    const std::string& device_container,
146
    const symbolic::Expression& size,
147
    const types::Pointer& type
148
) {
5✔
149
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
5✔
150
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
5✔
151
        builder,
5✔
152
        copy_block,
5✔
153
        host_container,
5✔
154
        device_container,
5✔
155
        offloading::DataTransferDirection::H2D,
5✔
156
        offloading::BufferLifecycle::ALLOC,
5✔
157
        type,
5✔
158
        type,
5✔
159
        this->blas_node_.debug_info(),
5✔
160
        size,
5✔
161
        symbolic::zero()
5✔
162
    );
5✔
163
}
5✔
164

165
void ROCBLASDataTransferExtraction::create_copy_from_device_with_deallocation(
166
    builder::StructuredSDFGBuilder& builder,
167
    structured_control_flow::Sequence& sequence,
168
    structured_control_flow::Block& block,
169
    const std::string& host_container,
170
    const std::string& device_container,
171
    const symbolic::Expression& size,
172
    const types::Pointer& type
173
) {
1✔
174
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
1✔
175
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
1✔
176
        builder,
1✔
177
        copy_block,
1✔
178
        host_container,
1✔
179
        device_container,
1✔
180
        offloading::DataTransferDirection::D2H,
1✔
181
        offloading::BufferLifecycle::FREE,
1✔
182
        type,
1✔
183
        type,
1✔
184
        this->blas_node_.debug_info(),
1✔
185
        size,
1✔
186
        symbolic::zero()
1✔
187
    );
1✔
188
}
1✔
189

190
ROCBLASDataTransferExtraction::ROCBLASDataTransferExtraction(math::blas::BLASNode& blas_node) : blas_node_(blas_node) {}
10✔
191

192
std::string ROCBLASDataTransferExtraction::name() const { return "ROCBLASDataTransferExtraction"; }
4✔
193

194
bool ROCBLASDataTransferExtraction::
195
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
6✔
196
    // BLAS node must have implementation type ROCMBLAS with data transfers
197
    if (this->blas_node_.implementation_type().value() != rocm::ImplementationType_ROCMWithTransfers.value()) {
6✔
198
        return false;
2✔
199
    }
2✔
200

201
    // Restrict to BLAS nodes in their own block
202
    auto& dfg = this->blas_node_.get_parent();
4✔
203
    if (dfg.nodes().size() != dfg.in_degree(this->blas_node_) + dfg.out_degree(this->blas_node_) + 1) {
4✔
204
        return false;
×
205
    }
×
206

207
    // Supported BLAS nodes
208
    if (dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
4✔
209
        return true;
2✔
210
    } else if (dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
2✔
211
        return true;
2✔
212
    } else {
2✔
213
        return false;
×
214
    }
×
215
}
4✔
216

217
void ROCBLASDataTransferExtraction::
218
    apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
219
    // Get data flow graph and block
220
    auto& dfg = this->blas_node_.get_parent();
2✔
221
    auto* block = dynamic_cast<structured_control_flow::Block*>(dfg.get_parent());
2✔
222
    assert(block);
2✔
223

224
    // Get sequence
225
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
2✔
226
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(scope_analysis.parent_scope(block));
2✔
227
    assert(sequence);
2✔
228

229
    // Determine type
230
    types::PrimitiveType precision;
2✔
231
    switch (this->blas_node_.precision()) {
2✔
232
        case math::blas::h:
×
233
            precision = types::PrimitiveType::Half;
×
234
            break;
×
235
        case math::blas::s:
1✔
236
            precision = types::PrimitiveType::Float;
1✔
237
            break;
1✔
238
        case math::blas::d:
1✔
239
            precision = types::PrimitiveType::Double;
1✔
240
            break;
1✔
241
        default:
×
242
            throw InvalidSDFGException("ROCBLASDataTransferExtraction: Unsupported precision");
×
243
    }
2✔
244
    types::Scalar base_type(precision);
2✔
245
    types::Pointer type(base_type);
2✔
246

247
    // Capture in and out accesses
248
    std::unordered_map<std::string, data_flow::AccessNode&> in_access, out_access;
2✔
249
    for (auto& iedge : dfg.in_edges(this->blas_node_)) {
7✔
250
        in_access.insert({iedge.dst_conn(), static_cast<data_flow::AccessNode&>(iedge.src())});
7✔
251
    }
7✔
252
    for (auto& oedge : dfg.out_edges(this->blas_node_)) {
2✔
253
        out_access.insert({oedge.src_conn(), static_cast<data_flow::AccessNode&>(oedge.dst())});
1✔
254
    }
1✔
255

256
    if (auto* dot_node = dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
2✔
257
        auto x_size = symbolic::mul(
1✔
258
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incx()), symbolic::one()),
1✔
259
            types::get_contiguous_element_size(type, true)
1✔
260
        );
1✔
261
        auto y_size = symbolic::mul(
1✔
262
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incy()), symbolic::one()),
1✔
263
            types::get_contiguous_element_size(type, true)
1✔
264
        );
1✔
265
        auto dx = this->create_device_container(builder, type, x_size);
1✔
266
        auto dy = this->create_device_container(builder, type, y_size);
1✔
267

268
        this->create_copy_to_device_with_allocation(
1✔
269
            builder, *sequence, *block, in_access.at("__x").data(), dx, x_size, type
1✔
270
        );
1✔
271
        this->create_copy_to_device_with_allocation(
1✔
272
            builder, *sequence, *block, in_access.at("__y").data(), dy, y_size, type
1✔
273
        );
1✔
274

275
        this->create_deallocate(builder, *sequence, *block, dx, type);
1✔
276
        this->create_deallocate(builder, *sequence, *block, dy, type);
1✔
277

278
        in_access.at("__x").data(dx);
1✔
279
        in_access.at("__y").data(dy);
1✔
280
    } else if (auto* gemm_node = dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
1✔
281
        auto elem_size = types::get_contiguous_element_size(type, true);
1✔
282
        auto a_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->k()), elem_size);
1✔
283
        auto b_size = symbolic::mul(symbolic::mul(gemm_node->k(), gemm_node->n()), elem_size);
1✔
284
        auto c_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->n()), elem_size);
1✔
285

286
        auto dA = this->create_device_container(builder, type, a_size);
1✔
287
        auto dB = this->create_device_container(builder, type, b_size);
1✔
288
        auto dC = this->create_device_container(builder, type, c_size);
1✔
289

290
        this->create_copy_to_device_with_allocation(
1✔
291
            builder, *sequence, *block, in_access.at("__A").data(), dA, a_size, type
1✔
292
        );
1✔
293
        this->create_copy_to_device_with_allocation(
1✔
294
            builder, *sequence, *block, in_access.at("__B").data(), dB, b_size, type
1✔
295
        );
1✔
296
        auto c_ptr = in_access.at("__C").data();
1✔
297
        this->create_copy_to_device_with_allocation(builder, *sequence, *block, c_ptr, dC, c_size, type);
1✔
298

299
        this->create_copy_from_device_with_deallocation(builder, *sequence, *block, c_ptr, dC, c_size, type);
1✔
300
        this->create_deallocate(builder, *sequence, *block, dA, type);
1✔
301
        this->create_deallocate(builder, *sequence, *block, dB, type);
1✔
302

303
        in_access.at("__A").data(dA);
1✔
304
        in_access.at("__B").data(dB);
1✔
305
        in_access.at("__C").data(dC);
1✔
306
    } else {
1✔
UNCOV
307
        throw InvalidSDFGException("ROCBLASDataTransferExtraction: Unsupported BLAS type");
×
308
    }
×
309

310
    // Change the implementation type to ROCMBLAS without data transfers
311
    this->blas_node_.implementation_type() = rocm::ImplementationType_ROCMWithoutTransfers;
2✔
312
}
2✔
313

314
void ROCBLASDataTransferExtraction::to_json(nlohmann::json& j) const {
2✔
315
    j["transformation_type"] = this->name();
2✔
316

317
    // BLAS nodes are not loops; they appear as generic elements in GNN data.
318
    // Use type "unknown" to match the feature extractor's classification.
319
    j["subgraph"] = {{"0", {{"element_id", this->blas_node_.element_id()}, {"type", "unknown"}}}};
2✔
320

321
    // Legacy field for backward compatibility.
322
    j["blas_node_element_id"] = this->blas_node_.element_id();
2✔
323
}
2✔
324

325
ROCBLASDataTransferExtraction ROCBLASDataTransferExtraction::
326
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
2✔
327
    size_t blas_node_id;
2✔
328
    if (j.contains("subgraph")) {
2✔
329
        const auto& node_desc = j.at("subgraph").at("0");
2✔
330
        blas_node_id = node_desc.at("element_id").get<size_t>();
2✔
331
    } else {
2✔
332
        blas_node_id = j.at("blas_node_element_id").get<size_t>();
×
333
    }
×
334
    auto* blas_node_element = builder.find_element_by_id(blas_node_id);
2✔
335
    if (!blas_node_element) {
2✔
336
        throw transformations::
×
337
            InvalidTransformationDescriptionException("Element with ID " + std::to_string(blas_node_id) + " not found");
×
338
    }
×
339
    auto* blas_node = dynamic_cast<math::blas::BLASNode*>(blas_node_element);
2✔
340
    if (!blas_node) {
2✔
341
        throw transformations::InvalidTransformationDescriptionException(
×
342
            "Element with ID " + std::to_string(blas_node_id) + " is not a BLASNode"
×
343
        );
×
344
    }
×
345

346
    return ROCBLASDataTransferExtraction(*blas_node);
2✔
347
}
2✔
348

349
} // namespace rocm
350
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc