• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27608666188

16 Jun 2026 09:41AM UTC coverage: 61.529% (-0.01%) from 61.54%
27608666188

Pull #767

github

web-flow
Merge fea7733d4 into 6b2e310be
Pull Request #767: Extending LoopAnalysis to provide data for partially perfect loop nests

333 of 398 new or added lines in 4 files covered. (83.67%)

6 existing lines in 1 file now uncovered.

36497 of 59317 relevant lines covered (61.53%)

1125.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.17
/opt/src/passes/offloading/data_transfer_minimization_pass.cpp
1
#include "sdfg/passes/offloading/data_transfer_minimization_pass.h"
2

3
#include <cstddef>
4
#include <string>
5
#include <unordered_set>
6
#include <utility>
7

8
#include "sdfg/analysis/analysis.h"
9
#include "sdfg/analysis/data_transfer_elimination_analysis.h"
10
#include "sdfg/analysis/users.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/code_node.h"
13
#include "sdfg/data_flow/data_flow_graph.h"
14
#include "sdfg/data_flow/library_node.h"
15
#include "sdfg/data_flow/memlet.h"
16
#include "sdfg/data_flow/tasklet.h"
17
#include "sdfg/element.h"
18
#include "sdfg/exceptions.h"
19
#include "sdfg/helpers/helpers.h"
20
#include "sdfg/structured_control_flow/block.h"
21
#include "sdfg/structured_control_flow/control_flow_node.h"
22
#include "sdfg/structured_control_flow/sequence.h"
23
#include "sdfg/symbolic/symbolic.h"
24
#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
25
#include "sdfg/targets/offloading/data_offloading_node.h"
26
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
27
#include "sdfg/types/pointer.h"
28
#include "sdfg/visitor/structured_sdfg_visitor.h"
29

30
namespace sdfg {
31
namespace passes {
32

33
DataTransferMinimizationPass::DataTransferMinimizationPass() {}
8✔
34

35
bool DataTransferMinimizationPass::eliminate_malloc_first_transfer(
36
    builder::StructuredSDFGBuilder& builder, analysis::OffloadHolder& malloc_holder, analysis::OffloadHolder& copy_in
37
) {
3✔
38
    // Get all relevant information
39
    std::string copy_in_device_container = copy_in.dev_data->data();
3✔
40
    DebugInfo copy_in_dst_debinfo = copy_in.dev_data->debug_info();
3✔
41

42
    // leave the malloc itself, because we have not proven yet that there is no more d2H transfer that needs it
43
    // DDE needs to be able to find it
44

45
    auto* h2d_block = dynamic_cast<structured_control_flow::Block*>(copy_in.offload_node->get_parent().get_parent());
3✔
46
    builder.remove_memlet(*h2d_block, *copy_in.host_access);
3✔
47
    builder.remove_node(*h2d_block, *copy_in.host_data);
3✔
48
    copy_in.remove_h2d_parts();
3✔
49
    copy_in.offload_node->remove_h2d();
3✔
50

51
    return true;
3✔
52
}
3✔
53

54
bool DataTransferMinimizationPass::eliminate_redundant_d2h(
55
    builder::StructuredSDFGBuilder& builder, analysis::OffloadHolder& h2d, analysis::OffloadHolder& d2h
56
) {
4✔
57
    // Get all relevant information
58
    std::string copy_out_device_container = d2h.dev_data->data();
4✔
59
    DebugInfo copy_out_dst_debinfo = d2h.dev_data->debug_info();
4✔
60

61
    // leave the malloc itself, because we have not proven yet that there is no more d2H transfer that needs it
62
    // DDE needs to be able to find it
63

64
    auto* d2h_block = dynamic_cast<structured_control_flow::Block*>(d2h.offload_node->get_parent().get_parent());
4✔
65
    if (d2h.offload_node->is_d2h()) {
4✔
66
        if (d2h.offload_node->is_free()) {
4✔
67
            std::string out_conn = d2h.host_access->src_conn();
4✔
68
            auto access_type = d2h.host_access->base_type().clone();
4✔
69
            builder.remove_memlet(*d2h_block, *d2h.host_access);
4✔
70
            builder.remove_node(*d2h_block, *d2h.host_data);
4✔
71
            d2h.offload_node->remove_d2h();
4✔
72
        } else { // remove it entirely
4✔
73
            builder.clear_code_node_legacy(*d2h_block, *d2h.offload_node);
×
74
        }
×
75
        d2h.remove_d2h_parts();
4✔
76
        return true;
4✔
77
    } else {
4✔
78
        return false;
×
79
    }
×
80
}
4✔
81

82
bool DataTransferMinimizationPass::eliminate_transfer_pair(
83
    builder::StructuredSDFGBuilder& builder,
84
    analysis::OffloadHolder& copy_out,
85
    analysis::OffloadHolder& copy_in,
86
    bool remove_d2h
87
) {
7✔
88
    // Get all relevant information
89
    std::string copy_out_device_container = copy_out.dev_data->data();
7✔
90
    std::string copy_in_device_container = copy_in.dev_data->data();
7✔
91
    DebugInfo copy_out_src_debinfo = copy_out.dev_data->debug_info();
7✔
92
    DebugInfo copy_in_dst_debinfo = copy_in.dev_data->debug_info();
7✔
93
    DebugInfo copy_out_off_debinfo = copy_out.offload_node->debug_info();
7✔
94
    DebugInfo copy_in_off_debinfo = copy_in.offload_node->debug_info();
7✔
95

96
    bool remove_entirely = false;
7✔
97
    // Remove what you can remove
98
    if (!remove_d2h && copy_out.offload_node->is_free()) {
7✔
99
        if (copy_out.offload_node->is_d2h()) {
7✔
100
            copy_out.offload_node->remove_free();
3✔
101
        } else {
4✔
102
            remove_entirely = true;
4✔
103
        }
4✔
104
    } else if (remove_d2h) {
7✔
105
        remove_entirely = true;
×
106
    }
×
107
    if (remove_entirely) {
7✔
108
        auto* copy_out_block =
4✔
109
            dynamic_cast<structured_control_flow::Block*>(copy_out.offload_node->get_parent().get_parent());
4✔
110
        builder.clear_code_node_legacy(*copy_out_block, *copy_out.offload_node);
4✔
111
    }
4✔
112

113
    auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(copy_in.offload_node->get_parent().get_parent()
7✔
114
    );
7✔
115
    builder.clear_code_node_legacy(*copy_in_block, *copy_in.offload_node);
7✔
116

117
    // Maps the device pointers if necessary
118
    if (copy_out_device_container != copy_in_device_container) {
7✔
119
        auto& container_type = builder.subject().type(copy_out_device_container);
×
120
        auto ref_type = container_type.clone();
×
121
        auto& in_access = builder.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
122
        auto& out_access = builder.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
123
        builder.add_reference_memlet(
×
124
            *copy_in_block,
×
125
            in_access,
×
126
            out_access,
×
127
            {symbolic::zero()},
×
128
            *ref_type,
×
NEW
129
            DebugInfo::merge(copy_out_off_debinfo, copy_in_off_debinfo)
×
130
        );
×
131
    }
×
132

133
    return true;
7✔
134
}
7✔
135

136
bool DataTransferMinimizationPass::
137
    run_pass(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
8✔
138
    analysis::DataTransferEliminationAnalysis transfer_analysis(builder.subject(), analysis_manager);
8✔
139
    transfer_analysis.run();
8✔
140

141
    int removed = 0;
8✔
142

143
    for (auto& [malloc_cand, first_h2d] : transfer_analysis.empty_malloc_candidates()) {
8✔
144
        auto& malloc_holder = *malloc_cand.offload;
3✔
145

146
        DEBUG_PRINTLN(
3✔
147
            "  Elim malloc: " << "#" << malloc_holder.malloc_node->element_id() << " -> "
3✔
148
                              << (malloc_holder.host_data ? malloc_holder.host_data->data() : "-") << " / "
3✔
149
                              << "h2d+malloc: #" << first_h2d.offload_node->element_id() << " "
3✔
150
                              << (first_h2d.host_data ? first_h2d.host_data->data() : "-") << " -> "
3✔
151
                              << first_h2d.dev_data->data()
3✔
152
        );
3✔
153

154
        bool success = eliminate_malloc_first_transfer(builder, malloc_holder, first_h2d);
3✔
155

156
        if (success) {
3✔
157
            ++removed;
3✔
158
        }
3✔
159
    }
3✔
160

161
    for (auto& [h2d_cand, redundant_d2h] : transfer_analysis.redundant_d2h_candidates()) {
8✔
162
        auto& h2d_holder = *h2d_cand.offload;
6✔
163

164
        if (h2d_holder.updates_on_dev) {
6✔
165
            DEBUG_PRINTLN(
4✔
166
                "  Elim h2d: " << "#" << h2d_holder.offload_node->element_id() << " -> " << h2d_holder.host_data->data()
4✔
167
                               << " -> " << h2d_holder.dev_data->data() << " / "
4✔
168
                               << "clean d2h: #" << redundant_d2h.offload_node->element_id() << " "
4✔
169
                               << redundant_d2h.dev_data->data() << " -> " << redundant_d2h.host_data->data()
4✔
170
            );
4✔
171
            bool success = eliminate_redundant_d2h(builder, h2d_holder, redundant_d2h);
4✔
172

173
            if (success) {
4✔
174
                ++removed;
4✔
175
            }
4✔
176
        }
4✔
177
    }
6✔
178

179
    for (auto& candidate : transfer_analysis.transfer_reuse_candidates()) {
8✔
180
        auto reads = candidate.first.read_count;
7✔
181
        auto& copy_out = *candidate.first.offload;
7✔
182
        auto& copy_in = candidate.second;
7✔
183
        auto& copy_in_container = copy_in.host_data->data();
7✔
184

185
        DEBUG_PRINTLN(
7✔
186
            "  Elim hd2: #" << copy_out.offload_node->element_id() << " " << copy_out.dev_data->data() << " -> "
7✔
187
                            << (copy_out.host_data ? copy_out.host_data->data() : "-") << " / "
7✔
188
                            << "d2h: #" << copy_in.offload_node->element_id() << " "
7✔
189
                            << (copy_in.host_data ? copy_in.host_data->data() : "-") << " -> "
7✔
190
                            << copy_in.dev_data->data()
7✔
191
        );
7✔
192

193
        bool success = eliminate_transfer_pair(builder, copy_out, copy_in, false);
7✔
194

195
        if (success) {
7✔
196
            ++removed;
7✔
197
        }
7✔
198
    }
7✔
199

200
    return removed > 0;
8✔
201
}
8✔
202

203
DataTransferMinimizationLegacy::
204
    DataTransferMinimizationLegacy(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager)
205
    : visitor::NonStoppingStructuredSDFGVisitor(builder, analysis_manager) {}
×
206

207
bool DataTransferMinimizationLegacy::visit() {
×
208
    DEBUG_PRINTLN("Running DataTransferMinimizationPass on " << this->builder_.subject().name());
×
209
    return visitor::NonStoppingStructuredSDFGVisitor::visit();
×
210
}
×
211

212
bool DataTransferMinimizationLegacy::accept(structured_control_flow::Sequence& sequence) {
×
213
    bool applied = false;
×
214
    offloading::DataOffloadingNode* copy_out = nullptr;
×
215
    structured_control_flow::Block* copy_out_block = nullptr;
×
216
    size_t copy_out_index = 0;
×
217

218
    // While a copy-out can be found:
219
    while (copy_out_index < sequence.size()) {
×
220
        // Find a new copy-out
221
        for (; copy_out_index < sequence.size(); copy_out_index++) {
×
222
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(copy_out_index).first)) {
×
223
                if (block->dataflow().library_nodes().size() == 1 && block->dataflow().tasklets().size() == 0) {
×
224
                    auto* libnode = *block->dataflow().library_nodes().begin();
×
225
                    if (auto* offloading_node = dynamic_cast<offloading::DataOffloadingNode*>(libnode)) {
×
226
                        if (offloading_node->is_d2h()) {
×
227
                            copy_out = offloading_node;
×
228
                            copy_out_block = block;
×
229
                            break;
×
230
                        }
×
231
                    }
×
232
                }
×
233
            }
×
234
        }
×
235

236
        // Find a matching copy-in
237
        size_t i;
×
238
        for (i = copy_out_index; i < sequence.size(); i++) {
×
239
            // Child must be a block
240
            auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(i).first);
×
241
            if (!copy_in_block) {
×
242
                continue;
×
243
            }
×
244

245
            // Block must contain exactly one library node
246
            if (copy_in_block->dataflow().library_nodes().size() != 1 ||
×
247
                copy_in_block->dataflow().tasklets().size() != 0) {
×
248
                continue;
×
249
            }
×
250

251
            // Library node must be an offloading node
252
            auto* copy_in =
×
253
                dynamic_cast<offloading::DataOffloadingNode*>(*copy_in_block->dataflow().library_nodes().begin());
×
254
            if (!copy_in) {
×
255
                continue;
×
256
            }
×
257

258
            // Offloading node must be a copy-in
259
            if (!copy_in->is_h2d()) {
×
260
                continue;
×
261
            }
×
262

263
            // Copy-in and copy-out must be redundant
264
            if (!copy_out->redundant_with(*copy_in)) {
×
265
                continue;
×
266
            }
×
267

268
            // Get src and dst access nodes for copy-in & -out
269
            auto [copy_out_src, copy_out_dst] = this->get_src_and_dst(copy_out_block->dataflow(), copy_out);
×
270
            auto [copy_in_src, copy_in_dst] = this->get_src_and_dst(copy_in_block->dataflow(), copy_in);
×
271

272
            // Get the write and read users
273
            auto& users = this->analysis_manager_.get<analysis::Users>();
×
274
            analysis::User* write = users.get_user(copy_out_dst->data(), copy_out_dst, analysis::Use::WRITE);
×
275
            if (!write) {
×
276
                continue;
×
277
            }
×
278
            analysis::User* read = users.get_user(copy_in_src->data(), copy_in_src, analysis::Use::READ);
×
279
            if (!read) {
×
280
                continue;
×
281
            }
×
282

283
            if (copy_out_dst->data() == copy_in_src->data()) {
×
284
                // Ensure that the container is not written between the data transfer nodes
285
                bool used_between = false;
×
286
                for (auto* user : users.all_uses_between(*write, *read)) {
×
287
                    if (user->container() == copy_out_dst->data() && user->use() != analysis::Use::READ) {
×
288
                        used_between = true;
×
289
                        break;
×
290
                    }
×
291
                }
×
292
                if (used_between) {
×
293
                    continue;
×
294
                }
×
295
            } else {
×
296
                if (!this->check_container_dependency(
×
297
                        copy_out_block, copy_out_dst->data(), copy_in_block, copy_in_src->data()
×
298
                    )) {
×
299
                    continue;
×
300
                }
×
301
            }
×
302

303
            // Check that the container is not written after the data transfer nodes
304
            bool read_after = false;
×
305
            for (auto* user : users.all_uses_after(*write)) {
×
306
                if (user->container() == copy_out_dst->data() && user->use() == analysis::Use::READ && user != read) {
×
307
                    read_after = true;
×
308
                    break;
×
309
                }
×
310
            }
×
311

312
            // Debug output
313
            DEBUG_PRINTLN(
×
314
                "  Eliminating " << (read_after ? "(" : "") << "copy-out: #" << copy_out->element_id() << " "
×
315
                                 << copy_out_src->data() << " -> " << copy_out_dst->data() << (read_after ? ")" : "")
×
316
                                 << " / copy-in: #" << copy_in->element_id() << " " << copy_in_src->data() << " -> "
×
317
                                 << copy_in_dst->data()
×
318
            );
×
319

320
            // Get all relevant information
321
            std::string copy_out_device_container = copy_out_src->data();
×
322
            std::string copy_in_device_container = copy_in_dst->data();
×
323
            DebugInfo copy_out_src_debinfo = copy_out_src->debug_info();
×
324
            DebugInfo copy_in_dst_debinfo = copy_in_dst->debug_info();
×
325

326
            // Remove the data tranfers
327
            if (read_after && copy_out->is_free()) {
×
328
                copy_out->remove_free();
×
329
            } else if (!read_after) {
×
330
                this->builder_.clear_code_node_legacy(*copy_out_block, *copy_out);
×
331
            }
×
332
            this->builder_.clear_code_node_legacy(*copy_in_block, *copy_in);
×
333

334
            // Maps the device pointers if necessary
335
            if (copy_out_device_container != copy_in_device_container) {
×
336
                auto& container_type = this->builder_.subject().type(copy_out_device_container);
×
337
                auto ref_type = container_type.clone();
×
338
                auto& in_access =
×
339
                    this->builder_.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
340
                auto& out_access =
×
341
                    this->builder_.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
342
                this->builder_.add_reference_memlet(
×
343
                    *copy_in_block,
×
344
                    in_access,
×
345
                    out_access,
×
346
                    {symbolic::zero()},
×
347
                    *ref_type,
×
348
                    DebugInfo::merge(copy_out->debug_info(), copy_in->debug_info())
×
349
                );
×
350
            }
×
351

352
            // Invalidate users analysis
353
            this->analysis_manager_.invalidate<analysis::Users>();
×
354
            applied = true;
×
355
            break;
×
356
        }
×
357

358
        // Skip if no matching copy-in was found
359
        if (i >= sequence.size()) {
×
360
            copy_out_index++;
×
361
        }
×
362
    }
×
363

364
    return applied;
×
365
}
×
366

367
std::pair<data_flow::AccessNode*, data_flow::AccessNode*> DataTransferMinimizationLegacy::
368
    get_src_and_dst(data_flow::DataFlowGraph& dfg, offloading::DataOffloadingNode* offloading_node) {
×
369
    if (!offloading_node->has_transfer()) {
×
370
        throw InvalidSDFGException(
×
371
            "DataTransferMinimization: Cannot get copy access nodes for offloading node without data transfers"
×
372
        );
×
373
    }
×
374
    data_flow::AccessNode *src, *dst;
×
375
    if (dynamic_cast<cuda::CUDADataOffloadingNode*>(offloading_node)) {
×
376
        src = this->get_in_access(offloading_node, "_src");
×
377
        dst = this->get_out_access(offloading_node, "_dst");
×
378
    } else if (dynamic_cast<rocm::ROCMDataOffloadingNode*>(offloading_node)) {
×
379
        src = this->get_in_access(offloading_node, "_src");
×
380
        dst = this->get_out_access(offloading_node, "_dst");
×
381
    } else {
×
382
        throw InvalidSDFGException(
×
383
            "DataTransferMinimization: Unknown offloading node encountered: " + offloading_node->code().value()
×
384
        );
×
385
    }
×
386
    return {src, dst};
×
387
}
×
388

389
data_flow::AccessNode* DataTransferMinimizationLegacy::
390
    get_in_access(data_flow::CodeNode* node, const std::string& dst_conn) {
×
391
    auto& dfg = node->get_parent();
×
392
    for (auto& iedge : dfg.in_edges(*node)) {
×
393
        if (iedge.dst_conn() == dst_conn) {
×
394
            return dynamic_cast<data_flow::AccessNode*>(&iedge.src());
×
395
        }
×
396
    }
×
397
    return nullptr;
×
398
}
×
399

400
data_flow::AccessNode* DataTransferMinimizationLegacy::
401
    get_out_access(data_flow::CodeNode* node, const std::string& src_conn) {
×
402
    auto& dfg = node->get_parent();
×
403
    for (auto& oedge : dfg.out_edges(*node)) {
×
404
        if (oedge.src_conn() == src_conn) {
×
405
            return static_cast<data_flow::AccessNode*>(&oedge.dst());
×
406
        }
×
407
    }
×
408
    return nullptr;
×
409
}
×
410

411
bool DataTransferMinimizationLegacy::check_container_dependency(
412
    structured_control_flow::Block* copy_out_block,
413
    const std::string& copy_out_container,
414
    structured_control_flow::Block* copy_in_block,
415
    const std::string& copy_in_container
416
) {
×
417
    // Simplification: Assume blocks are in the same sequence
418
    auto* copy_out_block_parent = copy_out_block->get_parent();
×
419
    auto* copy_in_block_parent = copy_in_block->get_parent();
×
420
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(copy_out_block_parent);
×
421
    if (copy_out_block_parent != copy_in_block_parent || !sequence) {
×
422
        return false;
×
423
    }
×
424

425
    std::unordered_set<std::string> copy_out_container_captures, copy_in_container_parts;
×
426
    size_t start = sequence->index(*copy_out_block);
×
427
    size_t stop = sequence->index(*copy_in_block);
×
428
    for (size_t i = start + 1; i < stop; i++) {
×
429
        auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence->at(i).first);
×
430
        if (!block) {
×
431
            continue;
×
432
        }
×
433

434
        auto& dfg = block->dataflow();
×
435
        for (auto* access_node : dfg.data_nodes()) {
×
436
            if (access_node->data() == copy_in_container) {
×
437
                // Only allow constant assignments
438
                for (auto& iedge : dfg.in_edges(*access_node)) {
×
439
                    auto* tasklet = dynamic_cast<data_flow::Tasklet*>(&iedge.src());
×
440
                    if (!tasklet || tasklet->code() != data_flow::TaskletCode::assign) {
×
441
                        continue;
×
442
                    }
×
443

444
                    auto& iedge2 = *dfg.in_edges(*tasklet).begin();
×
445
                    if (!dynamic_cast<data_flow::ConstantNode*>(&iedge2.src())) {
×
446
                        return false;
×
447
                    }
×
448
                }
×
449

450
                // Collect H2D container parts
451
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
452
                    if (oedge.type() != data_flow::MemletType::Reference) {
×
453
                        continue;
×
454
                    }
×
455

456
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
457
                    if (!access_node2) {
×
458
                        continue;
×
459
                    }
×
460

461
                    copy_in_container_parts.insert(access_node2->data());
×
462
                }
×
463
            } else if (access_node->data() == copy_out_container) {
×
464
                // Collect D2H container captures
465
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
466
                    if (oedge.type() != data_flow::MemletType::Dereference_Dst) {
×
467
                        continue;
×
468
                    }
×
469

470
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
471
                    if (!access_node2) {
×
472
                        continue;
×
473
                    }
×
474

475
                    copy_out_container_captures.insert(access_node2->data());
×
476
                }
×
477
            }
×
478
        }
×
479
    }
×
480

481
    // Find all matches between captures and parts
482
    size_t matches = 0;
×
483
    for (auto& capture : copy_out_container_captures) {
×
484
        for (auto& part : copy_in_container_parts) {
×
485
            if (capture == part) {
×
486
                matches++;
×
487
            }
×
488
        }
×
489
    }
×
490

491
    return (matches == 1);
×
492
}
×
493

494
} // namespace passes
495
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc