• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 24251733303

10 Apr 2026 03:55PM UTC coverage: 64.159% (-0.2%) from 64.376%
24251733303

Pull #673

github

web-flow
Merge b555ef1c7 into ccfd1d376
Pull Request #673: Data flow transfer elimination

381 of 542 new or added lines in 12 files covered. (70.3%)

112 existing lines in 3 files now uncovered.

30169 of 47022 relevant lines covered (64.16%)

578.46 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

18.77
/opt/src/passes/offloading/data_transfer_minimization_pass.cpp
1
#include "sdfg/passes/offloading/data_transfer_minimization_pass.h"
2

3
#include <cstddef>
4
#include <string>
5
#include <unordered_set>
6
#include <utility>
7

8
#include "sdfg/analysis/analysis.h"
9
#include "sdfg/analysis/data_transfer_elimination_analysis.h"
10
#include "sdfg/analysis/scope_analysis.h"
11
#include "sdfg/analysis/users.h"
12
#include "sdfg/data_flow/access_node.h"
13
#include "sdfg/data_flow/code_node.h"
14
#include "sdfg/data_flow/data_flow_graph.h"
15
#include "sdfg/data_flow/library_node.h"
16
#include "sdfg/data_flow/memlet.h"
17
#include "sdfg/data_flow/tasklet.h"
18
#include "sdfg/element.h"
19
#include "sdfg/exceptions.h"
20
#include "sdfg/helpers/helpers.h"
21
#include "sdfg/structured_control_flow/block.h"
22
#include "sdfg/structured_control_flow/control_flow_node.h"
23
#include "sdfg/structured_control_flow/sequence.h"
24
#include "sdfg/symbolic/symbolic.h"
25
#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
26
#include "sdfg/targets/offloading/data_offloading_node.h"
27
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
28
#include "sdfg/types/pointer.h"
29
#include "sdfg/visitor/structured_sdfg_visitor.h"
30

31
namespace sdfg {
32
namespace passes {
33

34
DataTransferMinimizationPass::DataTransferMinimizationPass() {}
3✔
35

36
bool DataTransferMinimizationPass::eliminate_transfer(
37
    builder::StructuredSDFGBuilder& builder,
38
    const analysis::OffloadHolder& copy_out,
39
    const analysis::OffloadHolder& copy_in,
40
    bool remove_d2h
41
) {
2✔
42
    // Get all relevant information
43
    std::string copy_out_device_container = copy_out.dev_data->data();
2✔
44
    std::string copy_in_device_container = copy_in.dev_data->data();
2✔
45
    DebugInfo copy_out_src_debinfo = copy_out.dev_data->debug_info();
2✔
46
    DebugInfo copy_in_dst_debinfo = copy_in.dev_data->debug_info();
2✔
47

48
    // Remove what you can remove
49
    if (!remove_d2h && copy_out.node->is_free()) {
2✔
50
        copy_out.node->remove_free();
1✔
51
    } else if (remove_d2h) {
1✔
52
        auto* copy_out_block = dynamic_cast<structured_control_flow::Block*>(copy_out.node->get_parent().get_parent());
1✔
53
        builder.clear_code_node_legacy(*copy_out_block, *copy_out.node);
1✔
54
    }
1✔
55
    auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(copy_in.node->get_parent().get_parent());
2✔
56
    builder.clear_code_node_legacy(*copy_in_block, *copy_in.node);
2✔
57

58
    // Maps the device pointers if necessary
59
    if (copy_out_device_container != copy_in_device_container) {
2✔
NEW
60
        auto& container_type = builder.subject().type(copy_out_device_container);
×
NEW
61
        auto ref_type = container_type.clone();
×
NEW
62
        auto& in_access = builder.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
NEW
63
        auto& out_access = builder.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
NEW
64
        builder.add_reference_memlet(
×
NEW
65
            *copy_in_block,
×
NEW
66
            in_access,
×
NEW
67
            out_access,
×
NEW
68
            {symbolic::zero()},
×
NEW
69
            *ref_type,
×
NEW
70
            DebugInfo::merge(copy_out.node->debug_info(), copy_in.node->debug_info())
×
NEW
71
        );
×
NEW
72
    }
×
73

74
    return true;
2✔
75
}
2✔
76

77
bool DataTransferMinimizationPass::
78
    run_pass(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
3✔
79
    analysis::DataTransferEliminationAnalysis transfer_analysis(builder.subject(), analysis_manager);
3✔
80
    transfer_analysis.run();
3✔
81
    auto& candidates = transfer_analysis.candidates();
3✔
82

83
    auto& users = analysis_manager.get<analysis::Users>();
3✔
84

85
    int removed = 0;
3✔
86

87
    for (auto& candidate : candidates) {
3✔
88
        auto reads = candidate.first.read_count;
2✔
89
        auto& copy_out = *candidate.first.offload;
2✔
90
        auto& copy_in = candidate.second;
2✔
91
        auto& copy_in_container = copy_in.host_data->data();
2✔
92

93
        // copy from legacy version as hack: checking for users after the copy_in container (because current analysis
94
        // stops looking at that point)
95
        // TODO unsafe: this does not cover all ways that still need the data on host. Safe is: only manage device-side
96
        // things here and let dead-data find the unused host stuff
97
        auto* read = users.get_user(
2✔
98
            copy_in.host_data->data(), const_cast<data_flow::AccessNode*>(copy_in.host_data), analysis::Use::READ
2✔
99
        );
2✔
100

101
        for (auto* after_use : users.all_uses_after(*read)) {
4✔
102
            if (after_use->container() == copy_in_container && after_use->use() == analysis::Use::READ &&
4✔
103
                after_use != read) {
4✔
104
                ++reads;
1✔
105
            }
1✔
106
        }
4✔
107

108
#ifndef NDEBUG
2✔
109
        std::cerr << "  Elim candidate "
2✔
110
                  << "copy-out: #" << copy_out.node->element_id() << " " << copy_out.dev_data->data() << " -> "
2✔
111
                  << (copy_out.host_data ? copy_out.host_data->data() : "-") << " / ";
2✔
112
        if (reads) {
2✔
113
            std::cerr << reads << " reads / ";
1✔
114
        }
1✔
115
        std::cerr << "copy-in: #" << copy_in.node->element_id() << " "
2✔
116
                  << (copy_in.host_data ? copy_in.host_data->data() : "-") << " -> " << copy_in.dev_data->data()
2✔
117
                  << std::endl;
2✔
118
#endif
2✔
119

120
        bool success = eliminate_transfer(builder, copy_out, copy_in, reads == 0);
2✔
121

122
        if (success) {
2✔
123
            ++removed;
2✔
124
        }
2✔
125
    }
2✔
126

127
    return removed > 0;
3✔
128
}
3✔
129

130
DataTransferMinimizationLegacy::
131
    DataTransferMinimizationLegacy(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager)
UNCOV
132
    : visitor::NonStoppingStructuredSDFGVisitor(builder, analysis_manager) {}
×
133

NEW
134
bool DataTransferMinimizationLegacy::visit() {
×
UNCOV
135
    DEBUG_PRINTLN("Running DataTransferMinimizationPass on " << this->builder_.subject().name());
×
UNCOV
136
    return visitor::NonStoppingStructuredSDFGVisitor::visit();
×
UNCOV
137
}
×
138

NEW
139
bool DataTransferMinimizationLegacy::accept(structured_control_flow::Sequence& sequence) {
×
UNCOV
140
    bool applied = false;
×
UNCOV
141
    offloading::DataOffloadingNode* copy_out = nullptr;
×
UNCOV
142
    structured_control_flow::Block* copy_out_block = nullptr;
×
UNCOV
143
    size_t copy_out_index = 0;
×
144

145
    // While a copy-out can be found:
UNCOV
146
    while (copy_out_index < sequence.size()) {
×
147
        // Find a new copy-out
UNCOV
148
        for (; copy_out_index < sequence.size(); copy_out_index++) {
×
UNCOV
149
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(copy_out_index).first)) {
×
UNCOV
150
                if (block->dataflow().library_nodes().size() == 1 && block->dataflow().tasklets().size() == 0) {
×
UNCOV
151
                    auto* libnode = *block->dataflow().library_nodes().begin();
×
UNCOV
152
                    if (auto* offloading_node = dynamic_cast<offloading::DataOffloadingNode*>(libnode)) {
×
UNCOV
153
                        if (offloading_node->is_d2h()) {
×
UNCOV
154
                            copy_out = offloading_node;
×
UNCOV
155
                            copy_out_block = block;
×
UNCOV
156
                            break;
×
UNCOV
157
                        }
×
UNCOV
158
                    }
×
UNCOV
159
                }
×
UNCOV
160
            }
×
UNCOV
161
        }
×
162

163
        // Find a matching copy-in
UNCOV
164
        size_t i;
×
UNCOV
165
        for (i = copy_out_index; i < sequence.size(); i++) {
×
166
            // Child must be a block
UNCOV
167
            auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(i).first);
×
UNCOV
168
            if (!copy_in_block) {
×
UNCOV
169
                continue;
×
UNCOV
170
            }
×
171

172
            // Block must contain exactly one library node
UNCOV
173
            if (copy_in_block->dataflow().library_nodes().size() != 1 ||
×
UNCOV
174
                copy_in_block->dataflow().tasklets().size() != 0) {
×
UNCOV
175
                continue;
×
UNCOV
176
            }
×
177

178
            // Library node must be an offloading node
UNCOV
179
            auto* copy_in =
×
UNCOV
180
                dynamic_cast<offloading::DataOffloadingNode*>(*copy_in_block->dataflow().library_nodes().begin());
×
UNCOV
181
            if (!copy_in) {
×
182
                continue;
×
183
            }
×
184

185
            // Offloading node must be a copy-in
UNCOV
186
            if (!copy_in->is_h2d()) {
×
UNCOV
187
                continue;
×
UNCOV
188
            }
×
189

190
            // Copy-in and copy-out must be redundant
UNCOV
191
            if (!copy_out->redundant_with(*copy_in)) {
×
UNCOV
192
                continue;
×
UNCOV
193
            }
×
194

195
            // Get src and dst access nodes for copy-in & -out
UNCOV
196
            auto [copy_out_src, copy_out_dst] = this->get_src_and_dst(copy_out_block->dataflow(), copy_out);
×
UNCOV
197
            auto [copy_in_src, copy_in_dst] = this->get_src_and_dst(copy_in_block->dataflow(), copy_in);
×
198

199
            // Get the write and read users
UNCOV
200
            auto& users = this->analysis_manager_.get<analysis::Users>();
×
UNCOV
201
            analysis::User* write = users.get_user(copy_out_dst->data(), copy_out_dst, analysis::Use::WRITE);
×
UNCOV
202
            if (!write) {
×
203
                continue;
×
204
            }
×
UNCOV
205
            analysis::User* read = users.get_user(copy_in_src->data(), copy_in_src, analysis::Use::READ);
×
UNCOV
206
            if (!read) {
×
207
                continue;
×
208
            }
×
209

UNCOV
210
            if (copy_out_dst->data() == copy_in_src->data()) {
×
211
                // Ensure that the container is not written between the data transfer nodes
UNCOV
212
                bool used_between = false;
×
UNCOV
213
                for (auto* user : users.all_uses_between(*write, *read)) {
×
214
                    if (user->container() == copy_out_dst->data() && user->use() != analysis::Use::READ) {
×
215
                        used_between = true;
×
216
                        break;
×
217
                    }
×
218
                }
×
UNCOV
219
                if (used_between) {
×
220
                    continue;
×
221
                }
×
UNCOV
222
            } else {
×
223
                if (!this->check_container_dependency(
×
224
                        copy_out_block, copy_out_dst->data(), copy_in_block, copy_in_src->data()
×
225
                    )) {
×
226
                    continue;
×
227
                }
×
228
            }
×
229

230
            // Check that the container is not written after the data transfer nodes
UNCOV
231
            bool read_after = false;
×
UNCOV
232
            for (auto* user : users.all_uses_after(*write)) {
×
UNCOV
233
                if (user->container() == copy_out_dst->data() && user->use() == analysis::Use::READ && user != read) {
×
UNCOV
234
                    read_after = true;
×
UNCOV
235
                    break;
×
UNCOV
236
                }
×
UNCOV
237
            }
×
238

239
            // Debug output
UNCOV
240
            DEBUG_PRINTLN(
×
NEW
241
                "  Eliminating " << (read_after ? "(" : "") << "copy-out: #" << copy_out->element_id() << " "
×
NEW
242
                                 << copy_out_src->data() << " -> " << copy_out_dst->data() << (read_after ? ")" : "")
×
NEW
243
                                 << " / copy-in: #" << copy_in->element_id() << " " << copy_in_src->data() << " -> "
×
NEW
244
                                 << copy_in_dst->data()
×
UNCOV
245
            );
×
246

247
            // Get all relevant information
UNCOV
248
            std::string copy_out_device_container = copy_out_src->data();
×
UNCOV
249
            std::string copy_in_device_container = copy_in_dst->data();
×
UNCOV
250
            DebugInfo copy_out_src_debinfo = copy_out_src->debug_info();
×
UNCOV
251
            DebugInfo copy_in_dst_debinfo = copy_in_dst->debug_info();
×
252

253
            // Remove the data tranfers
UNCOV
254
            if (read_after && copy_out->is_free()) {
×
UNCOV
255
                copy_out->remove_free();
×
UNCOV
256
            } else if (!read_after) {
×
UNCOV
257
                this->builder_.clear_code_node_legacy(*copy_out_block, *copy_out);
×
UNCOV
258
            }
×
UNCOV
259
            this->builder_.clear_code_node_legacy(*copy_in_block, *copy_in);
×
260

261
            // Maps the device pointers if necessary
UNCOV
262
            if (copy_out_device_container != copy_in_device_container) {
×
263
                auto& container_type = this->builder_.subject().type(copy_out_device_container);
×
264
                auto ref_type = container_type.clone();
×
265
                auto& in_access =
×
266
                    this->builder_.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
267
                auto& out_access =
×
268
                    this->builder_.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
269
                this->builder_.add_reference_memlet(
×
270
                    *copy_in_block,
×
271
                    in_access,
×
272
                    out_access,
×
273
                    {symbolic::zero()},
×
274
                    *ref_type,
×
275
                    DebugInfo::merge(copy_out->debug_info(), copy_in->debug_info())
×
276
                );
×
277
            }
×
278

279
            // Invalidate users analysis
UNCOV
280
            this->analysis_manager_.invalidate<analysis::Users>();
×
UNCOV
281
            applied = true;
×
UNCOV
282
            break;
×
UNCOV
283
        }
×
284

285
        // Skip if no matching copy-in was found
UNCOV
286
        if (i >= sequence.size()) {
×
UNCOV
287
            copy_out_index++;
×
UNCOV
288
        }
×
UNCOV
289
    }
×
290

UNCOV
291
    return applied;
×
UNCOV
292
}
×
293

294
std::pair<data_flow::AccessNode*, data_flow::AccessNode*> DataTransferMinimizationLegacy::
UNCOV
295
    get_src_and_dst(data_flow::DataFlowGraph& dfg, offloading::DataOffloadingNode* offloading_node) {
×
UNCOV
296
    if (!offloading_node->has_transfer()) {
×
297
        throw InvalidSDFGException(
×
298
            "DataTransferMinimization: Cannot get copy access nodes for offloading node without data transfers"
×
299
        );
×
300
    }
×
UNCOV
301
    data_flow::AccessNode *src, *dst;
×
UNCOV
302
    if (dynamic_cast<cuda::CUDADataOffloadingNode*>(offloading_node)) {
×
UNCOV
303
        src = this->get_in_access(offloading_node, "_src");
×
UNCOV
304
        dst = this->get_out_access(offloading_node, "_dst");
×
UNCOV
305
    } else if (dynamic_cast<rocm::ROCMDataOffloadingNode*>(offloading_node)) {
×
306
        src = this->get_in_access(offloading_node, "_src");
×
307
        dst = this->get_out_access(offloading_node, "_dst");
×
308
    } else {
×
309
        throw InvalidSDFGException(
×
310
            "DataTransferMinimization: Unknown offloading node encountered: " + offloading_node->code().value()
×
311
        );
×
312
    }
×
UNCOV
313
    return {src, dst};
×
UNCOV
314
}
×
315

316
data_flow::AccessNode* DataTransferMinimizationLegacy::
NEW
317
    get_in_access(data_flow::CodeNode* node, const std::string& dst_conn) {
×
UNCOV
318
    auto& dfg = node->get_parent();
×
UNCOV
319
    for (auto& iedge : dfg.in_edges(*node)) {
×
UNCOV
320
        if (iedge.dst_conn() == dst_conn) {
×
UNCOV
321
            return dynamic_cast<data_flow::AccessNode*>(&iedge.src());
×
UNCOV
322
        }
×
UNCOV
323
    }
×
324
    return nullptr;
×
UNCOV
325
}
×
326

327
data_flow::AccessNode* DataTransferMinimizationLegacy::
NEW
328
    get_out_access(data_flow::CodeNode* node, const std::string& src_conn) {
×
UNCOV
329
    auto& dfg = node->get_parent();
×
UNCOV
330
    for (auto& oedge : dfg.out_edges(*node)) {
×
UNCOV
331
        if (oedge.src_conn() == src_conn) {
×
UNCOV
332
            return static_cast<data_flow::AccessNode*>(&oedge.dst());
×
UNCOV
333
        }
×
UNCOV
334
    }
×
335
    return nullptr;
×
UNCOV
336
}
×
337

338
bool DataTransferMinimizationLegacy::check_container_dependency(
339
    structured_control_flow::Block* copy_out_block,
340
    const std::string& copy_out_container,
341
    structured_control_flow::Block* copy_in_block,
342
    const std::string& copy_in_container
343
) {
×
344
    // Simplification: Assume blocks are in the same sequence
345
    auto& scope_analysis = this->analysis_manager_.get<analysis::ScopeAnalysis>();
×
346
    auto* copy_out_block_parent = scope_analysis.parent_scope(copy_out_block);
×
347
    auto* copy_in_block_parent = scope_analysis.parent_scope(copy_in_block);
×
348
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(copy_out_block_parent);
×
349
    if (copy_out_block_parent != copy_in_block_parent || !sequence) {
×
350
        return false;
×
351
    }
×
352

353
    std::unordered_set<std::string> copy_out_container_captures, copy_in_container_parts;
×
354
    size_t start = sequence->index(*copy_out_block);
×
355
    size_t stop = sequence->index(*copy_in_block);
×
356
    for (size_t i = start + 1; i < stop; i++) {
×
357
        auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence->at(i).first);
×
358
        if (!block) {
×
359
            continue;
×
360
        }
×
361

362
        auto& dfg = block->dataflow();
×
363
        for (auto* access_node : dfg.data_nodes()) {
×
364
            if (access_node->data() == copy_in_container) {
×
365
                // Only allow constant assignments
366
                for (auto& iedge : dfg.in_edges(*access_node)) {
×
367
                    auto* tasklet = dynamic_cast<data_flow::Tasklet*>(&iedge.src());
×
368
                    if (!tasklet || tasklet->code() != data_flow::TaskletCode::assign) {
×
369
                        continue;
×
370
                    }
×
371

372
                    auto& iedge2 = *dfg.in_edges(*tasklet).begin();
×
373
                    if (!dynamic_cast<data_flow::ConstantNode*>(&iedge2.src())) {
×
374
                        return false;
×
375
                    }
×
376
                }
×
377

378
                // Collect H2D container parts
379
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
380
                    if (oedge.type() != data_flow::MemletType::Reference) {
×
381
                        continue;
×
382
                    }
×
383

384
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
385
                    if (!access_node2) {
×
386
                        continue;
×
387
                    }
×
388

389
                    copy_in_container_parts.insert(access_node2->data());
×
390
                }
×
391
            } else if (access_node->data() == copy_out_container) {
×
392
                // Collect D2H container captures
393
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
394
                    if (oedge.type() != data_flow::MemletType::Dereference_Dst) {
×
395
                        continue;
×
396
                    }
×
397

398
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
399
                    if (!access_node2) {
×
400
                        continue;
×
401
                    }
×
402

403
                    copy_out_container_captures.insert(access_node2->data());
×
404
                }
×
405
            }
×
406
        }
×
407
    }
×
408

409
    // Find all matches between captures and parts
410
    size_t matches = 0;
×
411
    for (auto& capture : copy_out_container_captures) {
×
412
        for (auto& part : copy_in_container_parts) {
×
413
            if (capture == part) {
×
414
                matches++;
×
415
            }
×
416
        }
×
417
    }
×
418

419
    return (matches == 1);
×
420
}
×
421

422
} // namespace passes
423
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc