• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 24348757857

13 Apr 2026 02:25PM UTC coverage: 64.469% (+0.09%) from 64.382%
24348757857

push

github

web-flow
Merge pull request #676 from daisytuner/ellide-host-mem-mgmt

Ellides H2D copies in case the host data was freshly allocated and not yet initialized before the offload transfer. In that case, offloaded Malloc is enough.

(will leave the host malloc itself in the graph, as that is a task for DDE to remove)

104 of 125 new or added lines in 5 files covered. (83.2%)

1 existing line in 1 file now uncovered.

30553 of 47392 relevant lines covered (64.47%)

584.02 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.72
/opt/src/analysis/data_transfer_elimination_analysis.cpp
1
#include "sdfg/analysis/data_transfer_elimination_analysis.h"
2

3

4
#include "sdfg/targets/cuda/cuda.h"
5
#include "sdfg/targets/offloading/data_offloading_node.h"
6

7
namespace sdfg::analysis {
8

9
void OffloadHolder::remove_host_side() {
1✔
10
    host_data = nullptr;
1✔
11
    host_access = nullptr;
1✔
12
}
1✔
13

14
OffloadState::OffloadState(DataTransferEliminationCandidateCollector& collector) : collector_(collector) {}
13✔
15

16
void OffloadState::found_escape(const std::string& container) { kills_containers_.insert(container); }
×
17

18
void OffloadState::found_ptr_write(const std::string& container, const data_flow::Memlet* memlet) {
1✔
19
    kills_containers_.insert(container);
1✔
20
}
1✔
21

22
void OffloadState::found_ptr_read(const std::string& container, const data_flow::Memlet* memlet) {
1✔
23
    // todo check with generated set if its ever possible for it to contain the transfer and a use
24
    reads_[container].push_back(memlet);
1✔
25
}
1✔
26

27
void OffloadState::found_full_barrier(ControlFlowNode& node) {
×
28
    generated_.clear(); // all generateds are from before and now wiped
×
29
    full_kill_ = true;
×
30
}
×
31

32
/**
33
 * @return { type, killing node }
34
 */
35
std::pair<OffloadState::KillingType, OffloadHolder*> OffloadState::find_killing_entry_node(const ExposedOffload&
36
                                                                                               in_flight) const {
3✔
37
    auto& holder = *in_flight.offload;
3✔
38
    auto& host_access_type = holder.host_access->base_type();
3✔
39

40
    auto* offload_node = holder.offload_node;
3✔
41
    auto* malloc_node = holder.malloc_node;
3✔
42

43
    for (const auto& entry_node : h2d_nodes_ | std::views::values) {
3✔
44
        auto& entry_holder = *entry_node;
3✔
45
        if (entry_holder.host_data->data() == holder.host_data->data()) {
3✔
46
            if (offload_node) {
3✔
47
                bool is_elim_candidate = holder.offload_node->redundant_with(*entry_holder.offload_node);
2✔
48
                return {is_elim_candidate ? KillingType::DeviceReuse : KillingType::Basic, &entry_holder};
2✔
49
            } else if (malloc_node) { // mallocs should only be in flight as long as they are untouched
2✔
50
                bool can_be_removed = entry_holder.offload_node->is_alloc() && entry_holder.offload_node->is_h2d();
1✔
51
                return {can_be_removed ? KillingType::EmptyHostMalloc : KillingType::Basic, &entry_holder};
1✔
52
            }
1✔
53
        } else if (host_access_type == entry_holder.host_access->base_type()) { // aliases
3✔
54
            // return &entry_node; // TODO left unhandled for now, because then most situations like a matmul could
55
            // never be eliminated
56
        }
×
57
    }
3✔
58

NEW
59
    return {KillingType::None, nullptr};
×
60
}
3✔
61

62
void OffloadState::found_malloc(Block& block, stdlib::MallocNode& malloc) {
1✔
63
    auto& dflow = block.dataflow();
1✔
64

65
    auto out_edges = dflow.out_edges_for_connector(malloc, malloc.output(0));
1✔
66
    if (out_edges.size() != 1) {
1✔
NEW
67
        throw std::runtime_error(
×
NEW
68
            "Unsupported: malloc node " + std::to_string(malloc.element_id()) + " with other than 1 output"
×
NEW
69
        );
×
NEW
70
    }
×
71
    auto* memlet = out_edges.at(0);
1✔
72
    auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->dst());
1✔
73
    generated_.emplace(malloc.element_id(), std::make_unique<OffloadHolder>(&malloc, access_node, memlet));
1✔
74
}
1✔
75

76
void OffloadState::found_offload_node(Block& block, offloading::DataOffloadingNode& offload) {
7✔
77
    auto& dflow = block.dataflow();
7✔
78

79
    bool src_is_dev = false;
7✔
80
    bool src_is_host = false;
7✔
81
    bool dst_is_dev = false;
7✔
82
    bool dst_is_host = false;
7✔
83

84
    if (is_D2H(offload.transfer_direction())) {
7✔
85
        src_is_dev = true;
4✔
86
        dst_is_host = true;
4✔
87
    } else if (is_H2D(offload.transfer_direction())) {
4✔
88
        src_is_host = true;
3✔
89
        dst_is_dev = true;
3✔
90
    }
3✔
91

92
    const data_flow::AccessNode* found_dev_access = nullptr;
7✔
93
    const data_flow::AccessNode* found_host_access = nullptr;
7✔
94
    const data_flow::Memlet* found_host_memlet = nullptr;
7✔
95

96
    for (auto& conn : offload.inputs()) {
7✔
97
        auto* memlet = dflow.in_edge_for_connector(offload, conn);
7✔
98
        auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->src());
7✔
99

100
        if (src_is_host) {
7✔
101
            found_host_access = access_node;
3✔
102
            found_host_memlet = memlet;
3✔
103
        }
3✔
104
        if (src_is_dev) {
7✔
105
            found_dev_access = access_node;
4✔
106
        }
4✔
107
    }
7✔
108

109
    for (auto& conn : offload.outputs()) {
7✔
110
        auto edges = dflow.out_edges_for_connector(offload, conn);
7✔
111
        if (edges.size() > 1) {
7✔
112
            throw std::runtime_error(
×
113
                "Unsupported: offload node " + std::to_string(offload.element_id()) +
×
114
                " with multiple outputs edges on " + conn
×
115
            );
×
116
        }
×
117
        auto* memlet = edges.at(0);
7✔
118
        auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->dst());
7✔
119

120
        if (dst_is_host) {
7✔
121
            found_host_access = access_node;
4✔
122
            found_host_memlet = memlet;
4✔
123
        }
4✔
124
        if (dst_is_dev) {
7✔
125
            found_dev_access = access_node;
3✔
126
        }
3✔
127
    }
7✔
128

129
    if (found_host_access && found_dev_access) {
7✔
130
        if (dst_is_host) {
7✔
131
            generated_.emplace(
4✔
132
                offload.element_id(),
4✔
133
                std::make_unique<OffloadHolder>(&offload, found_host_access, found_host_memlet, found_dev_access)
4✔
134
            );
4✔
135
        } else {
4✔
136
            add_h2d_entry(OffloadHolder{&offload, found_host_access, found_host_memlet, found_dev_access});
3✔
137
        }
3✔
138
    }
7✔
139
}
7✔
140

141
void OffloadState::add_h2d_entry(const OffloadHolder& entry) {
3✔
142
    h2d_nodes_.emplace(entry.offload_node->element_id(), std::make_unique<OffloadHolder>(entry));
3✔
143
    // todo also need to remove generated ones killed by this. But right now
144
}
3✔
145

146
void OffloadState::apply_kills_and_changes(ExposedType& exposed) const {
9✔
147
    if (full_kill_) {
9✔
148
        exposed.clear();
×
149
        return;
×
150
    }
×
151
    for (auto it = exposed.begin(); it != exposed.end();) {
12✔
152
        auto& [id, exposedOffload] = *it;
3✔
153
        auto& holder = *exposedOffload.offload;
3✔
154

155
        auto* host = exposedOffload.offload->host_data;
3✔
156
        auto& host_container = host->data();
3✔
157

158
        auto host_reads = reads_.find(host_container);
3✔
159
        if (host_reads != reads_.end() && host_reads->second.size() > 0) {
3✔
NEW
160
            if (holder.offload_node) {
×
NEW
161
                ++exposedOffload.read_count;
×
NEW
162
            } else if (holder.malloc_node) { // mallocs are just killed on first
×
NEW
163
                DEBUG_PRINTLN(
×
NEW
164
                    "In-flight malloc area of #" << holder.malloc_node->element_id()
×
NEW
165
                                                 << " is read without being initialized!"
×
NEW
166
                );
×
NEW
167
                it = exposed.erase(it);
×
NEW
168
                continue;
×
NEW
169
            }
×
UNCOV
170
        }
×
171

172
        if (kills_containers_.contains(host_container)) {
3✔
173
            it = exposed.erase(it);
×
174
            continue;
×
175
        }
×
176

177
        auto [kill_type, killing_entry] = find_killing_entry_node(exposedOffload);
3✔
178
        if (kill_type != KillingType::None) {
3✔
179
            if (kill_type == KillingType::DeviceReuse) {
3✔
180
                collector_.found_transfer_reuse_pair(exposedOffload, *killing_entry);
2✔
181
            } else if (kill_type == KillingType::EmptyHostMalloc) {
2✔
182
                collector_.found_empty_host_malloc(exposedOffload, *killing_entry);
1✔
183
            }
1✔
184
            it = exposed.erase(it);
3✔
185
            continue;
3✔
186
        }
3✔
187
        ++it;
×
188
    }
×
189

190
    for (auto& [id, gen] : generated_) {
9✔
191
        exposed.insert({id, ExposedOffload{gen.get(), 0}});
5✔
192
    }
5✔
193
}
9✔
194

195

196
void DataTransferEliminationAnalysis::handle_lib_node(Block& block, data_flow::LibraryNode& node) {
8✔
197
    BaseUserVisitor::handle_lib_node(block, node);
8✔
198

199
    if (auto* offload_node = dynamic_cast<offloading::DataOffloadingNode*>(&node)) {
8✔
200
        get_or_create_state(block).found_offload_node(block, *offload_node);
7✔
201
    } else if (auto* malloc_node = dynamic_cast<stdlib::MallocNode*>(&node)) {
7✔
202
        get_or_create_state(block).found_malloc(block, *malloc_node);
1✔
203
    }
1✔
204
}
8✔
205

206
void DataTransferEliminationAnalysis::handle_structured_loop_before_body(StructuredLoop& loop) {
×
207
    BaseUserVisitor::handle_structured_loop_before_body(loop);
×
208

209
    // auto* map = dynamic_cast<sdfg::structured_control_flow::Map*>(&loop);
210

211
    // if (map && map->schedule_type().category() == ScheduleTypeCategory::Offloader) {
212
    //     get_or_create_state(loop).found_offloaded_kernel(*map);
213
    // }
214
}
×
215

216
void DataTransferEliminationAnalysis::
217
    on_escape(const std::string& container, const ControlFlowNode* node, const Element* user) {
7✔
218
    if (dynamic_cast<const Block*>(node)) {
7✔
219
        if (auto* memlet = dynamic_cast<const data_flow::Memlet*>(user)) {
7✔
220
            if (auto* offload = dynamic_cast<const offloading::DataOffloadingNode*>(&memlet->dst())) {
7✔
221
                // accesses of offloading nodes are handled more intelligently in found_offload_node, so ignore them
222
                // here
223
                return;
7✔
224
            }
7✔
225
        }
7✔
226
    }
7✔
227
    get_or_create_state(*node).found_escape(container);
×
228
}
×
229

230
void DataTransferEliminationAnalysis::
231
    on_read_via(const std::string& container, const ControlFlowNode* node, const data_flow::Memlet* user) {
1✔
232
    if (!dynamic_cast<const offloading::DataOffloadingNode*>(&user->dst())) {
1✔
233
        get_or_create_state(*node).found_ptr_read(container, user);
1✔
234
    }
1✔
235
}
1✔
236

237
void DataTransferEliminationAnalysis::
238
    on_write_via(const std::string& container, const ControlFlowNode* node, const data_flow::Memlet* user) {
1✔
239
    if (!dynamic_cast<const offloading::DataOffloadingNode*>(&user->dst())) {
1✔
240
        get_or_create_state(*node).found_ptr_write(container, user);
1✔
241
    }
1✔
242
}
1✔
243

244
std::unique_ptr<OffloadState> DataTransferEliminationAnalysis::
245
    create_initial_state(const structured_control_flow::ControlFlowNode& node) {
13✔
246
    return std::make_unique<OffloadState>(*this);
13✔
247
}
13✔
248

249
void DataTransferEliminationAnalysis::run() {
4✔
250
    dispatch(sdfg_.root());
4✔
251

252
    run_forward(sdfg_.root());
4✔
253
}
4✔
254

255
} // namespace sdfg::analysis
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc