• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 24215882789

09 Apr 2026 10:12PM UTC coverage: 64.375% (-0.007%) from 64.382%
24215882789

Pull #668

github

web-flow
Merge 6f7f28e8f into bb3981349
Pull Request #668: Offload Memset to GPU

249 of 381 new or added lines in 18 files covered. (65.35%)

189 existing lines in 2 files now uncovered.

29942 of 46512 relevant lines covered (64.37%)

584.42 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.52
/opt/src/transformations/offloading/cuda_stdlib_data_transfer_extraction.cpp
1
#include "sdfg/transformations/offloading/cuda_stdlib_data_transfer_extraction.h"
2

3
#include <cassert>
4
#include <string>
5
#include <unordered_map>
6

7
#include "sdfg/analysis/analysis.h"
8
#include "sdfg/analysis/scope_analysis.h"
9
#include "sdfg/builder/structured_sdfg_builder.h"
10
#include "sdfg/data_flow/access_node.h"
11
#include "sdfg/data_flow/library_nodes/stdlib/memset.h"
12
#include "sdfg/exceptions.h"
13
#include "sdfg/structured_control_flow/block.h"
14
#include "sdfg/structured_control_flow/sequence.h"
15
#include "sdfg/symbolic/symbolic.h"
16
#include "sdfg/targets/cuda/cuda.h"
17
#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
18
#include "sdfg/types/type.h"
19
#include "sdfg/types/utils.h"
20
#include "symengine/symengine_rcp.h"
21

22
namespace sdfg {
23
namespace cuda {
24

25
std::string CUDAStdlibDataTransferExtraction::create_device_container(
26
    builder::StructuredSDFGBuilder& builder, const types::Pointer& type, const symbolic::Expression& size
27
) {
2✔
28
    auto new_type = type.clone();
2✔
29
    new_type->storage_type(types::StorageType(
2✔
30
        "NV_Generic", size, types::StorageType::AllocationType::Unmanaged, types::StorageType::AllocationType::Unmanaged
2✔
31
    ));
2✔
32
    auto device_container = builder.find_new_name(CUDA_DEVICE_PREFIX);
2✔
33
    builder.add_container(device_container, *new_type);
2✔
34
    return device_container;
2✔
35
}
2✔
36

37
void CUDAStdlibDataTransferExtraction::create_allocate(
38
    builder::StructuredSDFGBuilder& builder,
39
    structured_control_flow::Sequence& sequence,
40
    structured_control_flow::Block& block,
41
    const std::string& device_container,
42
    const symbolic::Expression& size,
43
    const types::Pointer& type
44
) {
2✔
45
    auto& alloc_block = builder.add_block_before(sequence, block, {}, block.debug_info());
2✔
46
    auto& d_cont = builder.add_access(alloc_block, device_container);
2✔
47
    auto& alloc_node = builder.add_library_node<CUDADataOffloadingNode>(
2✔
48
        alloc_block,
2✔
49
        this->memset_node_.debug_info(),
2✔
50
        size,
2✔
51
        symbolic::zero(),
2✔
52
        offloading::DataTransferDirection::NONE,
2✔
53
        offloading::BufferLifecycle::ALLOC
2✔
54
    );
2✔
55
    builder.add_computational_memlet(alloc_block, alloc_node, "_ret", d_cont, {}, type);
2✔
56
}
2✔
57

58
void CUDAStdlibDataTransferExtraction::create_deallocate(
59
    builder::StructuredSDFGBuilder& builder,
60
    structured_control_flow::Sequence& sequence,
61
    structured_control_flow::Block& block,
62
    const std::string& device_container,
63
    const types::Pointer& type
NEW
64
) {
×
NEW
65
    auto& dealloc_block = builder.add_block_after(sequence, block, {}, block.debug_info());
×
NEW
66
    auto& d_cont_in = builder.add_access(dealloc_block, device_container);
×
NEW
67
    auto& d_cont_out = builder.add_access(dealloc_block, device_container);
×
NEW
68
    auto& dealloc_node = builder.add_library_node<CUDADataOffloadingNode>(
×
NEW
69
        dealloc_block,
×
NEW
70
        this->memset_node_.debug_info(),
×
NEW
71
        SymEngine::null,
×
NEW
72
        symbolic::zero(),
×
NEW
73
        offloading::DataTransferDirection::NONE,
×
NEW
74
        offloading::BufferLifecycle::FREE
×
NEW
75
    );
×
NEW
76
    builder.add_computational_memlet(dealloc_block, d_cont_in, dealloc_node, "_ptr", {}, type);
×
NEW
77
    builder.add_computational_memlet(dealloc_block, dealloc_node, "_ptr", d_cont_out, {}, type);
×
NEW
78
}
×
79

80
void CUDAStdlibDataTransferExtraction::create_copy_from_device_with_deallocation(
81
    builder::StructuredSDFGBuilder& builder,
82
    structured_control_flow::Sequence& sequence,
83
    structured_control_flow::Block& block,
84
    const std::string& host_container,
85
    const std::string& device_container,
86
    const symbolic::Expression& size,
87
    const types::Pointer& type
88
) {
2✔
89
    auto& copy_block = builder.add_block_after(sequence, block, {}, block.debug_info());
2✔
90
    auto& cont = builder.add_access(copy_block, host_container);
2✔
91
    auto& d_cont = builder.add_access(copy_block, device_container);
2✔
92
    auto& copy_node = builder.add_library_node<CUDADataOffloadingNode>(
2✔
93
        copy_block,
2✔
94
        this->memset_node_.debug_info(),
2✔
95
        size,
2✔
96
        symbolic::zero(),
2✔
97
        offloading::DataTransferDirection::D2H,
2✔
98
        offloading::BufferLifecycle::FREE
2✔
99
    );
2✔
100
    builder.add_computational_memlet(copy_block, d_cont, copy_node, "_src", {}, type);
2✔
101
    builder.add_computational_memlet(copy_block, copy_node, "_dst", cont, {}, type);
2✔
102
}
2✔
103

104
CUDAStdlibDataTransferExtraction::CUDAStdlibDataTransferExtraction(::sdfg::stdlib::MemsetNode& memset_node)
105
    : memset_node_(memset_node) {}
8✔
106

107
std::string CUDAStdlibDataTransferExtraction::name() const { return "CUDAStdlibDataTransferExtraction"; }
1✔
108

109
bool CUDAStdlibDataTransferExtraction::
110
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
7✔
111
    if (this->memset_node_.implementation_type().value() != cuda::ImplementationType_CUDAWithTransfers.value()) {
7✔
112
        return false;
4✔
113
    }
4✔
114

115
    // Restrict to memset nodes in their own block
116
    auto& dfg = this->memset_node_.get_parent();
3✔
117
    if (dfg.nodes().size() != dfg.in_degree(this->memset_node_) + dfg.out_degree(this->memset_node_) + 1) {
3✔
NEW
118
        return false;
×
NEW
119
    }
×
120

121
    return true;
3✔
122
}
3✔
123

124
void CUDAStdlibDataTransferExtraction::
125
    apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
126
    // Get data flow graph and block
127
    auto& dfg = this->memset_node_.get_parent();
2✔
128
    auto* block = dynamic_cast<structured_control_flow::Block*>(dfg.get_parent());
2✔
129
    assert(block);
2✔
130

131
    // Get sequence
132
    auto& scope_analysis = analysis_manager.get<analysis::ScopeAnalysis>();
2✔
133
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(scope_analysis.parent_scope(block));
2✔
134
    assert(sequence);
2✔
135

136
    // Capture output accesses
137
    std::unordered_map<std::string, data_flow::AccessNode&> out_access;
2✔
138
    for (auto& oedge : dfg.out_edges(this->memset_node_)) {
2✔
139
        out_access.insert({oedge.src_conn(), static_cast<data_flow::AccessNode&>(oedge.dst())});
2✔
140
    }
2✔
141

142
    // Use the host container's actual type to avoid type mismatches
143
    auto& host_container_name = out_access.at("_ptr").data();
2✔
144
    auto& host_type = builder.subject().type(host_container_name);
2✔
145
    auto& type = static_cast<const types::Pointer&>(host_type);
2✔
146

147
    auto ptr_size = this->memset_node_.num();
2✔
148
    auto dPtr = this->create_device_container(builder, type, ptr_size);
2✔
149

150
    // Allocate device buffer
151
    this->create_allocate(builder, *sequence, *block, dPtr, ptr_size, type);
2✔
152

153
    // Copy from device to host and deallocate
154
    this->create_copy_from_device_with_deallocation(
2✔
155
        builder, *sequence, *block, out_access.at("_ptr").data(), dPtr, ptr_size, type
2✔
156
    );
2✔
157

158
    // Redirect output to device container
159
    out_access.at("_ptr").data(dPtr);
2✔
160

161
    // Change the implementation type to without transfers
162
    this->memset_node_.implementation_type() = cuda::ImplementationType_CUDAWithoutTransfers;
2✔
163
}
2✔
164

165
void CUDAStdlibDataTransferExtraction::to_json(nlohmann::json& j) const {
1✔
166
    j["transformation_type"] = this->name();
1✔
167
    j["subgraph"] = {{"0", {{"element_id", this->memset_node_.element_id()}, {"type", "unknown"}}}};
1✔
168
    j["memset_node_element_id"] = this->memset_node_.element_id();
1✔
169
}
1✔
170

171
} // namespace cuda
172
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc