• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 26556322966

27 May 2026 03:45PM UTC coverage: 60.869% (-0.02%) from 60.886%
26556322966

push

github

web-flow
Libnode ptr edges (#719)

Migrating SDFGs to treat pointers as inputs to libNodes / Calls as scalars.
A pointer will only appear in an output edge if its actually returned from the function (like malloc).

* Stdlib, Blas and Tensor Matmul nodes were migrated to this new format. Other, currently transitory Tensor Nodes are not yet migrated.
* DOCC version was bumped to incorporate previous docc-llvm versions (up to 0.4.0) that had been counted separately.
! Until all passes consider the use / leak of pointers as uncertainty / hiding potential writes, TensorNodes are declared as general side-effect.
* Lots of utility functions to centralize the creation (and edges) of various libNodes that needed to be changed.
* Fixed & unified docc paths across python and llvm front-ends.
* Skip BlockFusion test that fails to its libNodes currently having side effects
~ Prevent a crash in DotViz when using symbolic offsets into structs
* Removing old ConstProp pass, it is not safe for the new pointer representation and should not be all too critical

961 of 1749 new or added lines in 52 files covered. (54.95%)

87 existing lines in 28 files now uncovered.

35225 of 57870 relevant lines covered (60.87%)

11046.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.04
/opt/src/transformations/offloading/rocblas_data_transfer_extraction.cpp
1
#include "sdfg/transformations/offloading/rocblas_data_transfer_extraction.h"
2

3
#include <cassert>
4
#include <cstddef>
5
#include <nlohmann/json_fwd.hpp>
6
#include <string>
7
#include <unordered_map>
8

9
#include "sdfg/analysis/analysis.h"
10
#include "sdfg/analysis/scope_analysis.h"
11
#include "sdfg/builder/structured_sdfg_builder.h"
12
#include "sdfg/data_flow/access_node.h"
13
#include "sdfg/data_flow/library_nodes/math/blas/dot_node.h"
14
#include "sdfg/data_flow/library_nodes/math/blas/gemm_node.h"
15
#include "sdfg/data_flow/library_nodes/math/math.h"
16
#include "sdfg/element.h"
17
#include "sdfg/exceptions.h"
18
#include "sdfg/structured_control_flow/block.h"
19
#include "sdfg/structured_control_flow/sequence.h"
20
#include "sdfg/symbolic/symbolic.h"
21
#include "sdfg/targets/rocm/rocm.h"
22
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
23
#include "sdfg/transformations/transformation.h"
24
#include "sdfg/types/type.h"
25
#include "sdfg/types/utils.h"
26
#include "symengine/symengine_rcp.h"
27

28
namespace sdfg {
29
namespace rocm {
30

31
std::string ROCBLASDataTransferExtraction::create_device_container(
32
    builder::StructuredSDFGBuilder& builder, const types::Pointer& type, const symbolic::Expression& size
33
) {
5✔
34
    auto new_type = type.clone();
5✔
35
    new_type->storage_type(types::StorageType(
5✔
36
        "AMD_Generic", size, types::StorageType::AllocationType::Unmanaged, types::StorageType::AllocationType::Unmanaged
5✔
37
    ));
5✔
38
    auto device_container = builder.find_new_name(ROCM_DEVICE_PREFIX);
5✔
39
    builder.add_container(device_container, *new_type);
5✔
40
    return device_container;
5✔
41
}
5✔
42

43
void ROCBLASDataTransferExtraction::create_allocate(
44
    builder::StructuredSDFGBuilder& builder,
45
    structured_control_flow::Sequence& sequence,
46
    structured_control_flow::Block& block,
47
    const std::string& device_container,
48
    const symbolic::Expression& size,
49
    const types::Pointer& type
50
) {
×
51
    auto& alloc_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
52
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
53
        builder,
×
54
        alloc_block,
×
NEW
55
        device_container,
×
NEW
56
        device_container,
×
NEW
57
        offloading::DataTransferDirection::NONE,
×
NEW
58
        offloading::BufferLifecycle::ALLOC,
×
NEW
59
        type,
×
NEW
60
        type,
×
61
        this->blas_node_.debug_info(),
×
62
        size,
×
NEW
63
        symbolic::zero()
×
64
    );
×
65
}
×
66

67
void ROCBLASDataTransferExtraction::create_deallocate(
68
    builder::StructuredSDFGBuilder& builder,
69
    structured_control_flow::Sequence& sequence,
70
    structured_control_flow::Block& block,
71
    const std::string& device_container,
72
    const types::Pointer& type
73
) {
4✔
74
    auto& dealloc_block = builder.add_block_after(sequence, block, {}, block.debug_info());
4✔
75
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
4✔
76
        builder,
4✔
77
        dealloc_block,
4✔
78
        device_container,
4✔
79
        device_container,
4✔
80
        offloading::DataTransferDirection::NONE,
4✔
81
        offloading::BufferLifecycle::FREE,
4✔
82
        type,
4✔
83
        type,
4✔
84
        this->blas_node_.debug_info(),
4✔
85
        SymEngine::null,
4✔
86
        symbolic::zero()
4✔
87
    );
4✔
88
}
4✔
89

90
void ROCBLASDataTransferExtraction::create_copy_to_device(
91
    builder::StructuredSDFGBuilder& builder,
92
    structured_control_flow::Sequence& sequence,
93
    structured_control_flow::Block& block,
94
    const std::string& host_container,
95
    const std::string& device_container,
96
    const symbolic::Expression& size,
97
    const types::Pointer& type
98
) {
×
99
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
100
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
101
        builder,
×
102
        copy_block,
×
NEW
103
        host_container,
×
NEW
104
        device_container,
×
NEW
105
        offloading::DataTransferDirection::H2D,
×
NEW
106
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
107
        type,
×
NEW
108
        type,
×
109
        this->blas_node_.debug_info(),
×
110
        size,
×
NEW
111
        symbolic::zero()
×
112
    );
×
113
}
×
114

115
void ROCBLASDataTransferExtraction::create_copy_from_device(
116
    builder::StructuredSDFGBuilder& builder,
117
    structured_control_flow::Sequence& sequence,
118
    structured_control_flow::Block& block,
119
    const std::string& host_container,
120
    const std::string& device_container,
121
    const symbolic::Expression& size,
122
    const types::Pointer& type
123
) {
×
124
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
×
NEW
125
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
×
NEW
126
        builder,
×
127
        copy_block,
×
NEW
128
        host_container,
×
NEW
129
        device_container,
×
NEW
130
        offloading::DataTransferDirection::D2H,
×
NEW
131
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
132
        type,
×
NEW
133
        type,
×
134
        this->blas_node_.debug_info(),
×
135
        size,
×
NEW
136
        symbolic::zero()
×
137
    );
×
138
}
×
139

140
void ROCBLASDataTransferExtraction::create_copy_to_device_with_allocation(
141
    builder::StructuredSDFGBuilder& builder,
142
    structured_control_flow::Sequence& sequence,
143
    structured_control_flow::Block& block,
144
    const std::string& host_container,
145
    const std::string& device_container,
146
    const symbolic::Expression& size,
147
    const types::Pointer& type
148
) {
5✔
149
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
5✔
150
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
5✔
151
        builder,
5✔
152
        copy_block,
5✔
153
        host_container,
5✔
154
        device_container,
5✔
155
        offloading::DataTransferDirection::H2D,
5✔
156
        offloading::BufferLifecycle::ALLOC,
5✔
157
        type,
5✔
158
        type,
5✔
159
        this->blas_node_.debug_info(),
5✔
160
        size,
5✔
161
        symbolic::zero()
5✔
162
    );
5✔
163
}
5✔
164

165
void ROCBLASDataTransferExtraction::create_copy_from_device_with_deallocation(
166
    builder::StructuredSDFGBuilder& builder,
167
    structured_control_flow::Sequence& sequence,
168
    structured_control_flow::Block& block,
169
    const std::string& host_container,
170
    const std::string& device_container,
171
    const symbolic::Expression& size,
172
    const types::Pointer& type
173
) {
1✔
174
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
1✔
175
    offloading::add_offloading_node<ROCMDataOffloadingNode>(
1✔
176
        builder,
1✔
177
        copy_block,
1✔
178
        host_container,
1✔
179
        device_container,
1✔
180
        offloading::DataTransferDirection::D2H,
1✔
181
        offloading::BufferLifecycle::FREE,
1✔
182
        type,
1✔
183
        type,
1✔
184
        this->blas_node_.debug_info(),
1✔
185
        size,
1✔
186
        symbolic::zero()
1✔
187
    );
1✔
188
}
1✔
189

190
ROCBLASDataTransferExtraction::ROCBLASDataTransferExtraction(math::blas::BLASNode& blas_node) : blas_node_(blas_node) {}
10✔
191

192
std::string ROCBLASDataTransferExtraction::name() const { return "ROCBLASDataTransferExtraction"; }
4✔
193

194
bool ROCBLASDataTransferExtraction::
195
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
6✔
196
    // BLAS node must have implementation type ROCMBLAS with data transfers
197
    if (this->blas_node_.implementation_type().value() != rocm::ImplementationType_ROCMWithTransfers.value()) {
6✔
198
        return false;
2✔
199
    }
2✔
200

201
    // Restrict to BLAS nodes in their own block
202
    auto& dfg = this->blas_node_.get_parent();
4✔
203
    if (dfg.nodes().size() != dfg.in_degree(this->blas_node_) + dfg.out_degree(this->blas_node_) + 1) {
4✔
204
        return false;
×
205
    }
×
206

207
    // Supported BLAS nodes
208
    if (dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
4✔
209
        return true;
2✔
210
    } else if (dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
2✔
211
        return true;
2✔
212
    } else {
2✔
213
        return false;
×
214
    }
×
215
}
4✔
216

217
void ROCBLASDataTransferExtraction::
218
    apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
219
    // Get data flow graph and block
220
    auto& dfg = this->blas_node_.get_parent();
2✔
221
    auto* block = dynamic_cast<structured_control_flow::Block*>(dfg.get_parent());
2✔
222
    assert(block);
2✔
223

224
    // Get sequence
225
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
2✔
226
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(scope_analysis.parent_scope(block));
2✔
227
    assert(sequence);
2✔
228

229
    // Determine type
230
    types::PrimitiveType precision;
2✔
231
    switch (this->blas_node_.precision()) {
2✔
232
        case math::blas::h:
×
233
            precision = types::PrimitiveType::Half;
×
234
            break;
×
235
        case math::blas::s:
1✔
236
            precision = types::PrimitiveType::Float;
1✔
237
            break;
1✔
238
        case math::blas::d:
1✔
239
            precision = types::PrimitiveType::Double;
1✔
240
            break;
1✔
241
        default:
×
242
            throw InvalidSDFGException("ROCBLASDataTransferExtraction: Unsupported precision");
×
243
    }
2✔
244
    types::Scalar base_type(precision);
2✔
245
    types::Pointer type(base_type);
2✔
246

247
    // Capture in and out accesses
248
    std::unordered_map<std::string, data_flow::AccessNode&> in_access, out_access;
2✔
249
    for (auto& iedge : dfg.in_edges(this->blas_node_)) {
7✔
250
        in_access.insert({iedge.dst_conn(), static_cast<data_flow::AccessNode&>(iedge.src())});
7✔
251
    }
7✔
252
    for (auto& oedge : dfg.out_edges(this->blas_node_)) {
2✔
253
        out_access.insert({oedge.src_conn(), static_cast<data_flow::AccessNode&>(oedge.dst())});
1✔
254
    }
1✔
255

256
    if (auto* dot_node = dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
2✔
257
        auto x_size = symbolic::mul(
1✔
258
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incx()), symbolic::one()),
1✔
259
            types::get_contiguous_element_size(type, true)
1✔
260
        );
1✔
261
        auto y_size = symbolic::mul(
1✔
262
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incy()), symbolic::one()),
1✔
263
            types::get_contiguous_element_size(type, true)
1✔
264
        );
1✔
265
        auto dx = this->create_device_container(builder, type, x_size);
1✔
266
        auto dy = this->create_device_container(builder, type, y_size);
1✔
267

268
        this->create_copy_to_device_with_allocation(
1✔
269
            builder, *sequence, *block, in_access.at("__x").data(), dx, x_size, type
1✔
270
        );
1✔
271
        this->create_copy_to_device_with_allocation(
1✔
272
            builder, *sequence, *block, in_access.at("__y").data(), dy, y_size, type
1✔
273
        );
1✔
274

275
        this->create_deallocate(builder, *sequence, *block, dx, type);
1✔
276
        this->create_deallocate(builder, *sequence, *block, dy, type);
1✔
277

278
        in_access.at("__x").data(dx);
1✔
279
        in_access.at("__y").data(dy);
1✔
280
    } else if (auto* gemm_node = dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
1✔
281
        auto elem_size = types::get_contiguous_element_size(type, true);
1✔
282
        auto a_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->k()), elem_size);
1✔
283
        auto b_size = symbolic::mul(symbolic::mul(gemm_node->k(), gemm_node->n()), elem_size);
1✔
284
        auto c_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->n()), elem_size);
1✔
285

286
        auto dA = this->create_device_container(builder, type, a_size);
1✔
287
        auto dB = this->create_device_container(builder, type, b_size);
1✔
288
        auto dC = this->create_device_container(builder, type, c_size);
1✔
289

290
        this->create_copy_to_device_with_allocation(
1✔
291
            builder, *sequence, *block, in_access.at("__A").data(), dA, a_size, type
1✔
292
        );
1✔
293
        this->create_copy_to_device_with_allocation(
1✔
294
            builder, *sequence, *block, in_access.at("__B").data(), dB, b_size, type
1✔
295
        );
1✔
296
        auto c_ptr = in_access.at("__C").data();
1✔
297
        this->create_copy_to_device_with_allocation(builder, *sequence, *block, c_ptr, dC, c_size, type);
1✔
298

299
        this->create_copy_from_device_with_deallocation(builder, *sequence, *block, c_ptr, dC, c_size, type);
1✔
300
        this->create_deallocate(builder, *sequence, *block, dA, type);
1✔
301
        this->create_deallocate(builder, *sequence, *block, dB, type);
1✔
302

303
        in_access.at("__A").data(dA);
1✔
304
        in_access.at("__B").data(dB);
1✔
305
        in_access.at("__C").data(dC);
1✔
306
    } else {
1✔
UNCOV
307
        throw InvalidSDFGException("ROCBLASDataTransferExtraction: Unsupported BLAS type");
×
308
    }
×
309

310
    // Change the implementation type to ROCMBLAS without data transfers
311
    this->blas_node_.implementation_type() = rocm::ImplementationType_ROCMWithoutTransfers;
2✔
312
}
2✔
313

314
void ROCBLASDataTransferExtraction::to_json(nlohmann::json& j) const {
2✔
315
    j["transformation_type"] = this->name();
2✔
316

317
    // BLAS nodes are not loops; they appear as generic elements in GNN data.
318
    // Use type "unknown" to match the feature extractor's classification.
319
    j["subgraph"] = {{"0", {{"element_id", this->blas_node_.element_id()}, {"type", "unknown"}}}};
2✔
320

321
    // Legacy field for backward compatibility.
322
    j["blas_node_element_id"] = this->blas_node_.element_id();
2✔
323
}
2✔
324

325
ROCBLASDataTransferExtraction ROCBLASDataTransferExtraction::
326
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
2✔
327
    size_t blas_node_id;
2✔
328
    if (j.contains("subgraph")) {
2✔
329
        const auto& node_desc = j.at("subgraph").at("0");
2✔
330
        blas_node_id = node_desc.at("element_id").get<size_t>();
2✔
331
    } else {
2✔
332
        blas_node_id = j.at("blas_node_element_id").get<size_t>();
×
333
    }
×
334
    auto* blas_node_element = builder.find_element_by_id(blas_node_id);
2✔
335
    if (!blas_node_element) {
2✔
336
        throw transformations::
×
337
            InvalidTransformationDescriptionException("Element with ID " + std::to_string(blas_node_id) + " not found");
×
338
    }
×
339
    auto* blas_node = dynamic_cast<math::blas::BLASNode*>(blas_node_element);
2✔
340
    if (!blas_node) {
2✔
341
        throw transformations::InvalidTransformationDescriptionException(
×
342
            "Element with ID " + std::to_string(blas_node_id) + " is not a BLASNode"
×
343
        );
×
344
    }
×
345

346
    return ROCBLASDataTransferExtraction(*blas_node);
2✔
347
}
2✔
348

349
} // namespace rocm
350
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc