• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 24474374464

15 Apr 2026 07:36PM UTC coverage: 64.633%. First build
24474374464

push

github

web-flow
Merge pull request #682 from daisytuner/read-only-offloads-reuse

Read only offloads reuse

125 of 138 new or added lines in 5 files covered. (90.58%)

30803 of 47658 relevant lines covered (64.63%)

582.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.81
/opt/src/analysis/data_transfer_elimination_analysis.cpp
1
#include "sdfg/analysis/data_transfer_elimination_analysis.h"
2

3

4
#include "sdfg/targets/cuda/cuda.h"
5
#include "sdfg/targets/offloading/data_offloading_node.h"
6

7
namespace sdfg::analysis {
8

9
void OffloadHolder::remove_host_side() {
3✔
10
    host_data = nullptr;
3✔
11
    host_access = nullptr;
3✔
12
}
3✔
13

14
OffloadState::OffloadState(DataTransferEliminationCandidateCollector& collector) : collector_(collector) {}
42✔
15

16
void OffloadState::found_escape(const std::string& container) { kills_containers_.insert(container); }
4✔
17

18
void OffloadState::found_ptr_write(const std::string& container, const data_flow::Memlet* memlet) {
2✔
19
    kills_containers_.insert(container);
2✔
20
}
2✔
21

22
void OffloadState::found_ptr_read(const std::string& container, const data_flow::Memlet* memlet) {
2✔
23
    // todo check with generated set if its ever possible for it to contain the transfer and a use
24
    reads_[container].push_back(memlet);
2✔
25
}
2✔
26

27
void OffloadState::found_full_barrier(ControlFlowNode& node) {
×
28
    generated_.clear(); // all generateds are from before and now wiped
×
29
    full_kill_ = true;
×
30
}
×
31

32
/**
33
 * @return { type, killing node }
34
 */
35
std::pair<OffloadState::KillingType, OffloadHolder*> OffloadState::find_killing_entry_node(const ExposedOffload&
36
                                                                                               in_flight) const {
18✔
37
    auto& holder = *in_flight.offload;
18✔
38
    static const types::Scalar void_type(types::Void);
18✔
39
    auto& host_access_type = holder.host_access ? holder.host_access->base_type() : void_type;
18✔
40

41
    auto* offload_node = holder.offload_node;
18✔
42
    auto* malloc_node = holder.malloc_node;
18✔
43

44
    if (holder.ends_dev_lifetime || holder.updates_on_host || malloc_node) {
18✔
45
        for (const auto& entry : h2d_nodes_ | std::views::values) {
9✔
46
            auto& entry_holder = *entry;
9✔
47
            auto& entry_host_access_type = entry_holder.host_access ? entry_holder.host_access->base_type() : void_type;
9✔
48
            bool host_ptr_matches = entry_holder.host_data && in_flight.container == entry_holder.host_data->data();
9✔
49

50
            if (host_ptr_matches) {
9✔
51
                if (offload_node && (holder.updates_on_host || holder.ends_dev_lifetime)) {
9✔
52
                    // D2H -> H2D
53
                    bool is_elim_candidate = holder.offload_node->is_compatible_with(*entry_holder.offload_node) &&
6✔
54
                                             entry_holder.updates_on_dev;
6✔
55
                    return {is_elim_candidate ? KillingType::DeviceReuse : KillingType::Basic, &entry_holder};
6✔
56
                } else if (malloc_node) {
6✔
57
                    // Malloc -> H2D
58
                    // mallocs should only be in flight as long as they are untouched
59
                    bool can_be_removed = entry_holder.offload_node->is_alloc() && entry_holder.offload_node->is_h2d();
3✔
60
                    return {can_be_removed ? KillingType::EmptyHostMalloc : KillingType::Basic, &entry_holder};
3✔
61
                } else {
3✔
NEW
62
                    return {KillingType::Basic, &entry_holder};
×
NEW
63
                }
×
64
            } else if (host_access_type == entry_host_access_type) { // aliases
9✔
65
                // any -> any with aliasing types
66
                // return &entry_node; // TODO left unhandled for now, because then most situations like a matmul could
67
                // never be eliminated
NEW
68
            }
×
69
        }
9✔
70
    } else if (holder.starts_dev_lifetime) {
9✔
71
        for (const auto& entry : generated_ | std::views::values) {
9✔
72
            auto& entry_holder = *entry;
8✔
73

74
            bool dev_ptr_matches = false;
8✔
75
            if (holder.dev_data && entry_holder.dev_data) {
8✔
76
                dev_ptr_matches = in_flight.container == entry_holder.dev_data->data();
8✔
77
            }
8✔
78

79
            if (dev_ptr_matches) {
8✔
80
                // D_ALLOC -> D_FREE is the expected case, but kill for any match
81
                bool is_elim_candidate = holder.offload_node->is_compatible_with(*entry_holder.offload_node) &&
8✔
82
                                         entry_holder.ends_dev_lifetime && !entry_holder.updates_on_host;
8✔
83
                return {is_elim_candidate ? KillingType::DeviceFree : KillingType::Basic, &entry_holder};
8✔
84
            }
8✔
85
        }
8✔
86
    }
9✔
87

88
    return {KillingType::None, nullptr};
1✔
89
}
18✔
90

91
void OffloadState::found_malloc(Block& block, stdlib::MallocNode& malloc) {
4✔
92
    auto& dflow = block.dataflow();
4✔
93

94
    auto out_edges = dflow.out_edges_for_connector(malloc, malloc.output(0));
4✔
95
    if (out_edges.size() != 1) {
4✔
96
        throw std::runtime_error(
×
97
            "Unsupported: malloc node " + std::to_string(malloc.element_id()) + " with other than 1 output"
×
98
        );
×
99
    }
×
100
    auto* memlet = out_edges.at(0);
4✔
101
    auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->dst());
4✔
102
    generated_.emplace(malloc.element_id(), std::make_unique<OffloadHolder>(&malloc, access_node, memlet));
4✔
103
}
4✔
104

105
void OffloadState::found_offload_node(Block& block, offloading::DataOffloadingNode& offload) {
23✔
106
    auto& dflow = block.dataflow();
23✔
107

108
    bool src_is_dev = false;
23✔
109
    bool src_is_host = false;
23✔
110
    bool dst_is_dev = false;
23✔
111
    bool dst_is_host = false;
23✔
112

113
    bool starts_dev_lifetime = false;
23✔
114
    bool ends_dev_lifetime = false;
23✔
115
    bool updates_on_dev = false;
23✔
116
    bool updates_on_host = false;
23✔
117

118
    auto transfer_direction = offload.transfer_direction();
23✔
119
    auto lifecycle = offload.buffer_lifecycle();
23✔
120
    if (transfer_direction == offloading::DataTransferDirection::D2H) {
23✔
121
        src_is_dev = true;
9✔
122
        dst_is_host = true;
9✔
123
        updates_on_host = true;
9✔
124
        if (lifecycle == offloading::BufferLifecycle::FREE) {
9✔
125
            ends_dev_lifetime = true;
8✔
126
        }
8✔
127
    } else if (transfer_direction == offloading::DataTransferDirection::H2D) {
14✔
128
        src_is_host = true;
11✔
129
        dst_is_dev = true;
11✔
130
        updates_on_dev = true;
11✔
131
        if (lifecycle == offloading::BufferLifecycle::ALLOC) {
11✔
132
            starts_dev_lifetime = true;
11✔
133
        }
11✔
134
    } else if (offloading::is_NONE(transfer_direction)) {
11✔
135
        if (lifecycle == offloading::BufferLifecycle::ALLOC) {
3✔
NEW
136
            starts_dev_lifetime = true;
×
NEW
137
            dst_is_dev = true;
×
138
        } else if (lifecycle == offloading::BufferLifecycle::FREE) {
3✔
139
            ends_dev_lifetime = true;
3✔
140
            src_is_dev = true;
3✔
141
        }
3✔
142
    }
3✔
143

144
    const data_flow::AccessNode* found_dev_access = nullptr;
23✔
145
    const data_flow::AccessNode* found_host_access = nullptr;
23✔
146
    const data_flow::Memlet* found_host_memlet = nullptr;
23✔
147

148
    for (auto& conn : offload.inputs()) {
23✔
149
        auto* memlet = dflow.in_edge_for_connector(offload, conn);
23✔
150
        auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->src());
23✔
151

152
        if (src_is_host) {
23✔
153
            found_host_access = access_node;
11✔
154
            found_host_memlet = memlet;
11✔
155
        }
11✔
156
        if (src_is_dev) {
23✔
157
            found_dev_access = access_node;
12✔
158
        }
12✔
159
    }
23✔
160

161
    for (auto& conn : offload.outputs()) {
23✔
162
        auto edges = dflow.out_edges_for_connector(offload, conn);
23✔
163
        if (edges.size() > 1) {
23✔
164
            throw std::runtime_error(
×
165
                "Unsupported: offload node " + std::to_string(offload.element_id()) +
×
166
                " with multiple outputs edges on " + conn
×
167
            );
×
168
        }
×
169
        auto* memlet = edges.at(0);
23✔
170
        auto* access_node = dynamic_cast<const data_flow::AccessNode*>(&memlet->dst());
23✔
171

172
        if (dst_is_host) {
23✔
173
            found_host_access = access_node;
9✔
174
            found_host_memlet = memlet;
9✔
175
        }
9✔
176
        if (dst_is_dev) {
23✔
177
            found_dev_access = access_node;
11✔
178
        }
11✔
179
    }
23✔
180

181
    if (ends_dev_lifetime || updates_on_host) {
23✔
182
        generated_.emplace(
12✔
183
            offload.element_id(),
12✔
184
            std::make_unique<OffloadHolder>(
12✔
185
                &offload,
12✔
186
                found_host_access,
12✔
187
                found_host_memlet,
12✔
188
                found_dev_access,
12✔
189
                starts_dev_lifetime,
12✔
190
                ends_dev_lifetime,
12✔
191
                updates_on_dev,
12✔
192
                updates_on_host
12✔
193
            )
12✔
194
        );
12✔
195
    } else if (starts_dev_lifetime || updates_on_dev) {
12✔
196
        add_h2d_entry(OffloadHolder{
11✔
197
            &offload,
11✔
198
            found_host_access,
11✔
199
            found_host_memlet,
11✔
200
            found_dev_access,
11✔
201
            starts_dev_lifetime,
11✔
202
            ends_dev_lifetime,
11✔
203
            updates_on_dev,
11✔
204
            updates_on_host
11✔
205
        });
11✔
206
    }
11✔
207
}
23✔
208

209
void OffloadState::add_h2d_entry(const OffloadHolder& entry) {
11✔
210
    h2d_nodes_.emplace(entry.offload_node->element_id(), std::make_unique<OffloadHolder>(entry));
11✔
211
    // todo also need to remove generated ones killed by this. But right now, only max 1 per block anyway
212
}
11✔
213

214
void OffloadState::apply_kills_and_changes(ExposedType& exposed) const {
33✔
215
    if (full_kill_) {
33✔
216
        exposed.clear();
×
217
        return;
×
218
    }
×
219
    std::list<ExposedOffload> dynamic_inserts;
33✔
220
    for (auto it = exposed.begin(); it != exposed.end();) {
56✔
221
        auto& [id, exposedOffload] = *it;
23✔
222
        auto& holder = *exposedOffload.offload;
23✔
223

224

225
        auto container_reads = reads_.find(exposedOffload.container);
23✔
226
        if (container_reads != reads_.end() && !container_reads->second.empty()) {
23✔
227
            if (holder.malloc_node) { // mallocs are just killed on first use
1✔
228
                DEBUG_PRINTLN(
×
229
                    "In-flight malloc area of #" << holder.malloc_node->element_id()
×
230
                                                 << " is read without being initialized!"
×
231
                );
×
232
                it = exposed.erase(it);
×
233
                continue;
×
234
            } else { // track if a live var is read
1✔
235
                ++exposedOffload.read_count;
1✔
236
            }
1✔
237
        }
1✔
238

239
        if (kills_containers_.contains(exposedOffload.container)) {
23✔
240
            it = exposed.erase(it);
5✔
241
            continue;
5✔
242
        }
5✔
243

244
        auto [kill_type, killing_entry] = find_killing_entry_node(exposedOffload);
18✔
245
        if (kill_type != KillingType::None) {
18✔
246
            if (kill_type == KillingType::DeviceReuse) {
17✔
247
                collector_.found_transfer_reuse_pair(exposedOffload, *killing_entry);
6✔
248
            } else if (kill_type == KillingType::EmptyHostMalloc) {
11✔
249
                collector_.found_empty_host_malloc(exposedOffload, *killing_entry);
3✔
250
            } else if (kill_type == KillingType::DeviceFree) {
8✔
251
                // we have a on-device-alloc that survived without kills to the on-device-free
252
                // -> promote this to a host-relevant D2H point, that might be reused
253

254
                // replace the current H2D with the "D2H" that would allow it to live on
255
                // this creates a D2H-like exposedOffload, despite us not knowing the host-var at this point
256
                auto* host_data = holder.host_data;
2✔
257
                if (host_data) {
2✔
258
                    dynamic_inserts.emplace_back(killing_entry, host_data->data(), 0);
2✔
259
                    it = exposed.erase(it);
2✔
260
                    continue;
2✔
261
                }
2✔
262
            }
2✔
263
            it = exposed.erase(it);
15✔
264
            continue;
15✔
265
        }
17✔
266
        ++it;
1✔
267
    }
1✔
268

269
    for (auto& [id, gen] : generated_) {
33✔
270
        auto* holder = gen.get();
16✔
271
        if (holder->updates_on_host || holder->malloc_node) { // block unidentified host-container ones from being
16✔
272
                                                              // exposed. If we could reconstruct, it will be a
273
                                                              // dynamic_insert
274
            exposed.insert({id, ExposedOffload{holder, holder->host_data->data(), 0}});
13✔
275
        }
13✔
276
    }
16✔
277
    for (auto& gen : dynamic_inserts) {
33✔
278
        exposed.insert({gen.offload->offload_node->element_id(), gen});
2✔
279
    }
2✔
280
    for (auto& [id, gen] : h2d_nodes_) {
33✔
281
        auto* holder = gen.get();
11✔
282
        if (holder->starts_dev_lifetime) {
11✔
283
            exposed.insert({id, ExposedOffload{holder, holder->dev_data->data(), 0}});
11✔
284
        }
11✔
285
    }
11✔
286
}
33✔
287

288

289
void DataTransferEliminationAnalysis::handle_lib_node(Block& block, data_flow::LibraryNode& node) {
31✔
290
    BaseUserVisitor::handle_lib_node(block, node);
31✔
291

292
    if (auto* offload_node = dynamic_cast<offloading::DataOffloadingNode*>(&node)) {
31✔
293
        get_or_create_state(block).found_offload_node(block, *offload_node);
23✔
294
    } else if (auto* malloc_node = dynamic_cast<stdlib::MallocNode*>(&node)) {
23✔
295
        get_or_create_state(block).found_malloc(block, *malloc_node);
4✔
296
    }
4✔
297
}
31✔
298

299
void DataTransferEliminationAnalysis::handle_structured_loop_before_body(StructuredLoop& loop) {
1✔
300
    BaseUserVisitor::handle_structured_loop_before_body(loop);
1✔
301

302
    // auto* map = dynamic_cast<sdfg::structured_control_flow::Map*>(&loop);
303

304
    // if (map && map->schedule_type().category() == ScheduleTypeCategory::Offloader) {
305
    //     get_or_create_state(loop).found_offloaded_kernel(*map);
306
    // }
307
}
1✔
308

309
void DataTransferEliminationAnalysis::
310
    on_escape(const std::string& container, const ControlFlowNode* node, const Element* user) {
27✔
311
    if (dynamic_cast<const Block*>(node)) {
27✔
312
        if (auto* memlet = dynamic_cast<const data_flow::Memlet*>(user)) {
27✔
313
            if (auto* offload = dynamic_cast<const offloading::DataOffloadingNode*>(&memlet->dst())) {
27✔
314
                // accesses of offloading nodes are handled more intelligently in found_offload_node, so ignore them
315
                // here
316
                return;
23✔
317
            }
23✔
318
        }
27✔
319
    }
27✔
320
    get_or_create_state(*node).found_escape(container);
4✔
321
}
4✔
322

323
void DataTransferEliminationAnalysis::
324
    on_read_via(const std::string& container, const ControlFlowNode* node, const data_flow::Memlet* user) {
2✔
325
    if (!dynamic_cast<const offloading::DataOffloadingNode*>(&user->dst())) {
2✔
326
        get_or_create_state(*node).found_ptr_read(container, user);
2✔
327
    }
2✔
328
}
2✔
329

330
void DataTransferEliminationAnalysis::
331
    on_write_via(const std::string& container, const ControlFlowNode* node, const data_flow::Memlet* user) {
2✔
332
    if (!dynamic_cast<const offloading::DataOffloadingNode*>(&user->dst())) {
2✔
333
        get_or_create_state(*node).found_ptr_write(container, user);
2✔
334
    }
2✔
335
}
2✔
336

337
std::unique_ptr<OffloadState> DataTransferEliminationAnalysis::
338
    create_initial_state(const structured_control_flow::ControlFlowNode& node) {
42✔
339
    return std::make_unique<OffloadState>(*this);
42✔
340
}
42✔
341

342
void DataTransferEliminationAnalysis::run() {
7✔
343
    dispatch(sdfg_.root());
7✔
344

345
    run_forward(sdfg_.root());
7✔
346
}
7✔
347

348
} // namespace sdfg::analysis
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc