• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 26520678771

27 May 2026 03:22PM UTC coverage: 60.864% (-0.02%) from 60.886%
26520678771

Pull #719

github

web-flow
Merge 99c5e4f9d into 707dadcf8
Pull Request #719: Libnode ptr edges

961 of 1749 new or added lines in 52 files covered. (54.95%)

90 existing lines in 29 files now uncovered.

35222 of 57870 relevant lines covered (60.86%)

11043.61 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.04
/opt/src/transformations/offloading/cublas_data_transfer_extraction.cpp
1
#include "sdfg/transformations/offloading/cublas_data_transfer_extraction.h"
2

3
#include <cassert>
4
#include <cstddef>
5
#include <nlohmann/json_fwd.hpp>
6
#include <string>
7
#include <unordered_map>
8

9
#include "sdfg/analysis/analysis.h"
10
#include "sdfg/analysis/scope_analysis.h"
11
#include "sdfg/builder/structured_sdfg_builder.h"
12
#include "sdfg/data_flow/access_node.h"
13
#include "sdfg/data_flow/library_nodes/math/blas/dot_node.h"
14
#include "sdfg/data_flow/library_nodes/math/blas/gemm_node.h"
15
#include "sdfg/data_flow/library_nodes/math/math.h"
16
#include "sdfg/element.h"
17
#include "sdfg/exceptions.h"
18
#include "sdfg/structured_control_flow/block.h"
19
#include "sdfg/structured_control_flow/sequence.h"
20
#include "sdfg/symbolic/symbolic.h"
21
#include "sdfg/targets/cuda/cuda.h"
22
#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
23
#include "sdfg/transformations/transformation.h"
24
#include "sdfg/types/type.h"
25
#include "sdfg/types/utils.h"
26
#include "symengine/symengine_rcp.h"
27

28
namespace sdfg {
29
namespace cuda {
30

31
std::string CUBLASDataTransferExtraction::create_device_container(
32
    builder::StructuredSDFGBuilder& builder, const types::Pointer& type, const symbolic::Expression& size
33
) {
5✔
34
    auto new_type = type.clone();
5✔
35
    new_type->storage_type(types::StorageType(
5✔
36
        "NV_Generic", size, types::StorageType::AllocationType::Unmanaged, types::StorageType::AllocationType::Unmanaged
5✔
37
    ));
5✔
38
    auto device_container = builder.find_new_name(CUDA_DEVICE_PREFIX);
5✔
39
    builder.add_container(device_container, *new_type);
5✔
40
    return device_container;
5✔
41
}
5✔
42

43
void CUBLASDataTransferExtraction::create_allocate(
44
    builder::StructuredSDFGBuilder& builder,
45
    structured_control_flow::Sequence& sequence,
46
    structured_control_flow::Block& block,
47
    const std::string& device_container,
48
    const symbolic::Expression& size,
49
    const types::Pointer& type
50
) {
×
51
    auto& alloc_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
52
    offloading::add_offloading_node<CUDADataOffloadingNode>(
×
NEW
53
        builder,
×
54
        alloc_block,
×
NEW
55
        device_container,
×
NEW
56
        device_container,
×
NEW
57
        offloading::DataTransferDirection::NONE,
×
NEW
58
        offloading::BufferLifecycle::ALLOC,
×
NEW
59
        type,
×
NEW
60
        type,
×
61
        this->blas_node_.debug_info(),
×
62
        size,
×
NEW
63
        symbolic::zero()
×
64
    );
×
65
}
×
66

67
void CUBLASDataTransferExtraction::create_deallocate(
68
    builder::StructuredSDFGBuilder& builder,
69
    structured_control_flow::Sequence& sequence,
70
    structured_control_flow::Block& block,
71
    const std::string& device_container,
72
    const types::Pointer& type
73
) {
4✔
74
    auto& dealloc_block = builder.add_block_after(sequence, block, {}, block.debug_info());
4✔
75
    offloading::add_offloading_node<CUDADataOffloadingNode>(
4✔
76
        builder,
4✔
77
        dealloc_block,
4✔
78
        device_container,
4✔
79
        device_container,
4✔
80
        offloading::DataTransferDirection::NONE,
4✔
81
        offloading::BufferLifecycle::FREE,
4✔
82
        type,
4✔
83
        type,
4✔
84
        this->blas_node_.debug_info(),
4✔
85
        SymEngine::null,
4✔
86
        symbolic::zero()
4✔
87
    );
4✔
88
}
4✔
89

90
void CUBLASDataTransferExtraction::create_copy_to_device(
91
    builder::StructuredSDFGBuilder& builder,
92
    structured_control_flow::Sequence& sequence,
93
    structured_control_flow::Block& block,
94
    const std::string& host_container,
95
    const std::string& device_container,
96
    const symbolic::Expression& size,
97
    const types::Pointer& type
98
) {
×
99
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
×
NEW
100
    offloading::add_offloading_node<CUDADataOffloadingNode>(
×
NEW
101
        builder,
×
102
        copy_block,
×
NEW
103
        host_container,
×
NEW
104
        device_container,
×
NEW
105
        offloading::DataTransferDirection::H2D,
×
NEW
106
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
107
        type,
×
NEW
108
        type,
×
109
        this->blas_node_.debug_info(),
×
110
        size,
×
NEW
111
        symbolic::zero()
×
112
    );
×
113
}
×
114

115
void CUBLASDataTransferExtraction::create_copy_from_device(
116
    builder::StructuredSDFGBuilder& builder,
117
    structured_control_flow::Sequence& sequence,
118
    structured_control_flow::Block& block,
119
    const std::string& host_container,
120
    const std::string& device_container,
121
    const symbolic::Expression& size,
122
    const types::Pointer& type
123
) {
×
124
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
×
NEW
125
    offloading::add_offloading_node<CUDADataOffloadingNode>(
×
NEW
126
        builder,
×
127
        copy_block,
×
NEW
128
        host_container,
×
NEW
129
        device_container,
×
NEW
130
        offloading::DataTransferDirection::D2H,
×
NEW
131
        offloading::BufferLifecycle::NO_CHANGE,
×
NEW
132
        type,
×
NEW
133
        type,
×
134
        this->blas_node_.debug_info(),
×
135
        size,
×
NEW
136
        symbolic::zero()
×
137
    );
×
138
}
×
139

140
void CUBLASDataTransferExtraction::create_copy_to_device_with_allocation(
141
    builder::StructuredSDFGBuilder& builder,
142
    structured_control_flow::Sequence& sequence,
143
    structured_control_flow::Block& block,
144
    const std::string& host_container,
145
    const std::string& device_container,
146
    const symbolic::Expression& size,
147
    const types::Pointer& type
148
) {
5✔
149
    auto& copy_block = builder.add_block_before(sequence, block, {}, block.debug_info());
5✔
150
    offloading::add_offloading_node<CUDADataOffloadingNode>(
5✔
151
        builder,
5✔
152
        copy_block,
5✔
153
        host_container,
5✔
154
        device_container,
5✔
155
        offloading::DataTransferDirection::H2D,
5✔
156
        offloading::BufferLifecycle::ALLOC,
5✔
157
        type,
5✔
158
        type,
5✔
159
        this->blas_node_.debug_info(),
5✔
160
        size,
5✔
161
        symbolic::zero()
5✔
162
    );
5✔
163
}
5✔
164

165
void CUBLASDataTransferExtraction::create_copy_from_device_with_deallocation(
166
    builder::StructuredSDFGBuilder& builder,
167
    structured_control_flow::Sequence& sequence,
168
    structured_control_flow::Block& block,
169
    const std::string& host_container,
170
    const std::string& device_container,
171
    const symbolic::Expression& size,
172
    const types::Pointer& type
173
) {
1✔
174
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
1✔
175
    offloading::add_offloading_node<CUDADataOffloadingNode>(
1✔
176
        builder,
1✔
177
        copy_block,
1✔
178
        host_container,
1✔
179
        device_container,
1✔
180
        offloading::DataTransferDirection::D2H,
1✔
181
        offloading::BufferLifecycle::FREE,
1✔
182
        type,
1✔
183
        type,
1✔
184
        this->blas_node_.debug_info(),
1✔
185
        size,
1✔
186
        symbolic::zero()
1✔
187
    );
1✔
188
}
1✔
189

190
CUBLASDataTransferExtraction::CUBLASDataTransferExtraction(math::blas::BLASNode& blas_node) : blas_node_(blas_node) {}
10✔
191

192
std::string CUBLASDataTransferExtraction::name() const { return "CUBLASDataTransferExtraction"; }
4✔
193

194
bool CUBLASDataTransferExtraction::
195
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
6✔
196
    // BLAS node must have implementation type CUBLAS without data transfers
197
    if (this->blas_node_.implementation_type().value() != cuda::ImplementationType_CUDAWithTransfers.value()) {
6✔
198
        return false;
2✔
199
    }
2✔
200

201

202
    // Restrict to BLAS nodes in their own block
203
    auto& dfg = this->blas_node_.get_parent();
4✔
204
    if (dfg.nodes().size() != dfg.in_degree(this->blas_node_) + dfg.out_degree(this->blas_node_) + 1) {
4✔
205
        return false;
×
206
    }
×
207

208
    // Supported BLAS nodes
209
    if (dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
4✔
210
        return true;
2✔
211
    } else if (dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
2✔
212
        return true;
2✔
213
    } else {
2✔
214
        return false;
×
215
    }
×
216
}
4✔
217

218
void CUBLASDataTransferExtraction::
219
    apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
220
    // Get data flow graph and block
221
    auto& dfg = this->blas_node_.get_parent();
2✔
222
    auto* block = dynamic_cast<structured_control_flow::Block*>(dfg.get_parent());
2✔
223
    assert(block);
2✔
224

225
    // Get sequence
226
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
2✔
227
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(scope_analysis.parent_scope(block));
2✔
228
    assert(sequence);
2✔
229

230
    // Determine type
231
    types::PrimitiveType precision;
2✔
232
    switch (this->blas_node_.precision()) {
2✔
233
        case math::blas::h:
×
234
            precision = types::PrimitiveType::Half;
×
235
            break;
×
236
        case math::blas::s:
1✔
237
            precision = types::PrimitiveType::Float;
1✔
238
            break;
1✔
239
        case math::blas::d:
1✔
240
            precision = types::PrimitiveType::Double;
1✔
241
            break;
1✔
242
        default:
×
243
            throw InvalidSDFGException("CUBLASDataTransferExtraction: Unsupported precision");
×
244
    }
2✔
245
    types::Scalar base_type(precision);
2✔
246
    types::Pointer type(base_type);
2✔
247

248
    // Capture in and out accesses
249
    std::unordered_map<std::string, data_flow::AccessNode&> in_access, out_access;
2✔
250
    for (auto& iedge : dfg.in_edges(this->blas_node_)) {
7✔
251
        in_access.insert({iedge.dst_conn(), static_cast<data_flow::AccessNode&>(iedge.src())});
7✔
252
    }
7✔
253
    for (auto& oedge : dfg.out_edges(this->blas_node_)) {
2✔
254
        out_access.insert({oedge.src_conn(), static_cast<data_flow::AccessNode&>(oedge.dst())});
1✔
255
    }
1✔
256

257
    if (auto* dot_node = dynamic_cast<math::blas::DotNode*>(&this->blas_node_)) {
2✔
258
        auto x_size = symbolic::mul(
1✔
259
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incx()), symbolic::one()),
1✔
260
            types::get_contiguous_element_size(type, true)
1✔
261
        );
1✔
262
        auto y_size = symbolic::mul(
1✔
263
            symbolic::add(symbolic::mul(symbolic::sub(dot_node->n(), symbolic::one()), dot_node->incy()), symbolic::one()),
1✔
264
            types::get_contiguous_element_size(type, true)
1✔
265
        );
1✔
266
        auto dx = this->create_device_container(builder, type, x_size);
1✔
267
        auto dy = this->create_device_container(builder, type, y_size);
1✔
268

269
        this->create_copy_to_device_with_allocation(
1✔
270
            builder, *sequence, *block, in_access.at("__x").data(), dx, x_size, type
1✔
271
        );
1✔
272
        this->create_copy_to_device_with_allocation(
1✔
273
            builder, *sequence, *block, in_access.at("__y").data(), dy, y_size, type
1✔
274
        );
1✔
275

276
        this->create_deallocate(builder, *sequence, *block, dx, type);
1✔
277
        this->create_deallocate(builder, *sequence, *block, dy, type);
1✔
278

279
        in_access.at("__x").data(dx);
1✔
280
        in_access.at("__y").data(dy);
1✔
281
    } else if (auto* gemm_node = dynamic_cast<math::blas::GEMMNode*>(&this->blas_node_)) {
1✔
282
        auto elem_size = types::get_contiguous_element_size(type, true);
1✔
283
        auto a_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->k()), elem_size);
1✔
284
        auto b_size = symbolic::mul(symbolic::mul(gemm_node->k(), gemm_node->n()), elem_size);
1✔
285
        auto c_size = symbolic::mul(symbolic::mul(gemm_node->m(), gemm_node->n()), elem_size);
1✔
286

287
        auto dA = this->create_device_container(builder, type, a_size);
1✔
288
        auto dB = this->create_device_container(builder, type, b_size);
1✔
289
        auto dC = this->create_device_container(builder, type, c_size);
1✔
290

291
        this->create_copy_to_device_with_allocation(
1✔
292
            builder, *sequence, *block, in_access.at("__A").data(), dA, a_size, type
1✔
293
        );
1✔
294
        this->create_copy_to_device_with_allocation(
1✔
295
            builder, *sequence, *block, in_access.at("__B").data(), dB, b_size, type
1✔
296
        );
1✔
297
        auto c_container = in_access.at("__C").data();
1✔
298
        this->create_copy_to_device_with_allocation(builder, *sequence, *block, c_container, dC, c_size, type);
1✔
299

300
        this->create_copy_from_device_with_deallocation(builder, *sequence, *block, c_container, dC, c_size, type);
1✔
301
        this->create_deallocate(builder, *sequence, *block, dA, type);
1✔
302
        this->create_deallocate(builder, *sequence, *block, dB, type);
1✔
303

304
        in_access.at("__A").data(dA);
1✔
305
        in_access.at("__B").data(dB);
1✔
306
        in_access.at("__C").data(dC);
1✔
307
    } else {
1✔
UNCOV
308
        throw InvalidSDFGException("CUBLASDataTransferExtraction: Unsupported BLAS type");
×
309
    }
×
310

311
    // Change the implementation type to CUBLAS without data transfers
312
    this->blas_node_.implementation_type() = cuda::ImplementationType_CUDAWithoutTransfers;
2✔
313
}
2✔
314

315
void CUBLASDataTransferExtraction::to_json(nlohmann::json& j) const {
2✔
316
    j["transformation_type"] = this->name();
2✔
317

318
    // BLAS nodes are not loops; they appear as generic elements in GNN data.
319
    // Use type "unknown" to match the feature extractor's classification.
320
    j["subgraph"] = {{"0", {{"element_id", this->blas_node_.element_id()}, {"type", "unknown"}}}};
2✔
321

322
    // Legacy field for backward compatibility.
323
    j["blas_node_element_id"] = this->blas_node_.element_id();
2✔
324
}
2✔
325

326
CUBLASDataTransferExtraction CUBLASDataTransferExtraction::
327
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
2✔
328
    size_t blas_node_id;
2✔
329
    if (j.contains("subgraph")) {
2✔
330
        const auto& node_desc = j.at("subgraph").at("0");
2✔
331
        blas_node_id = node_desc.at("element_id").get<size_t>();
2✔
332
    } else {
2✔
333
        blas_node_id = j.at("blas_node_element_id").get<size_t>();
×
334
    }
×
335
    auto* blas_node_element = builder.find_element_by_id(blas_node_id);
2✔
336
    if (!blas_node_element) {
2✔
337
        throw transformations::
×
338
            InvalidTransformationDescriptionException("Element with ID " + std::to_string(blas_node_id) + " not found");
×
339
    }
×
340
    auto* blas_node = dynamic_cast<math::blas::BLASNode*>(blas_node_element);
2✔
341
    if (!blas_node) {
2✔
342
        throw transformations::InvalidTransformationDescriptionException(
×
343
            "Element with ID " + std::to_string(blas_node_id) + " is not a BLASNode"
×
344
        );
×
345
    }
×
346

347
    return CUBLASDataTransferExtraction(*blas_node);
2✔
348
}
2✔
349

350
} // namespace cuda
351
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc