• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27898444386

21 Jun 2026 08:18AM UTC coverage: 61.843% (-0.03%) from 61.87%
27898444386

push

github

web-flow
enable transfer elimination to work on simple host bounces (#790)

31 of 66 new or added lines in 5 files covered. (46.97%)

2 existing lines in 2 files now uncovered.

37038 of 59890 relevant lines covered (61.84%)

1018.08 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

29.63
/opt/src/passes/offloading/data_transfer_minimization_pass.cpp
1
#include "sdfg/passes/offloading/data_transfer_minimization_pass.h"
2

3
#include <cstddef>
4
#include <string>
5
#include <unordered_set>
6
#include <utility>
7

8
#include "sdfg/analysis/analysis.h"
9
#include "sdfg/analysis/data_transfer_elimination_analysis.h"
10
#include "sdfg/analysis/users.h"
11
#include "sdfg/data_flow/access_node.h"
12
#include "sdfg/data_flow/code_node.h"
13
#include "sdfg/data_flow/data_flow_graph.h"
14
#include "sdfg/data_flow/library_node.h"
15
#include "sdfg/data_flow/memlet.h"
16
#include "sdfg/data_flow/tasklet.h"
17
#include "sdfg/element.h"
18
#include "sdfg/exceptions.h"
19
#include "sdfg/helpers/helpers.h"
20
#include "sdfg/structured_control_flow/block.h"
21
#include "sdfg/structured_control_flow/control_flow_node.h"
22
#include "sdfg/structured_control_flow/sequence.h"
23
#include "sdfg/symbolic/symbolic.h"
24
#include "sdfg/targets/cuda/cuda_data_offloading_node.h"
25
#include "sdfg/targets/offloading/data_offloading_node.h"
26
#include "sdfg/targets/rocm/rocm_data_offloading_node.h"
27
#include "sdfg/types/pointer.h"
28
#include "sdfg/visitor/structured_sdfg_visitor.h"
29

30
namespace sdfg {
31
namespace passes {
32

33
DataTransferMinimizationPass::DataTransferMinimizationPass() {}
8✔
34

35
bool DataTransferMinimizationPass::eliminate_malloc_first_transfer(
36
    builder::StructuredSDFGBuilder& builder, analysis::OffloadHolder& malloc_holder, analysis::OffloadHolder& copy_in
37
) {
3✔
38
    // Get all relevant information
39
    std::string copy_in_device_container = copy_in.dev_data->data();
3✔
40
    DebugInfo copy_in_dst_debinfo = copy_in.dev_data->debug_info();
3✔
41

42
    // leave the malloc itself, because we have not proven yet that there is no more d2H transfer that needs it
43
    // DDE needs to be able to find it
44

45
    auto* h2d_block = dynamic_cast<structured_control_flow::Block*>(copy_in.offload_node->get_parent().get_parent());
3✔
46
    builder.remove_memlet(*h2d_block, *copy_in.host_access);
3✔
47
    builder.remove_node(*h2d_block, *copy_in.host_data);
3✔
48
    copy_in.remove_h2d_parts();
3✔
49
    copy_in.offload_node->remove_h2d();
3✔
50

51
    return true;
3✔
52
}
3✔
53

54
bool DataTransferMinimizationPass::eliminate_redundant_d2h(
55
    builder::StructuredSDFGBuilder& builder, analysis::OffloadHolder& h2d, analysis::OffloadHolder& d2h
56
) {
4✔
57
    // Get all relevant information
58
    std::string copy_out_device_container = d2h.dev_data->data();
4✔
59
    DebugInfo copy_out_dst_debinfo = d2h.dev_data->debug_info();
4✔
60

61
    // leave the malloc itself, because we have not proven yet that there is no more d2H transfer that needs it
62
    // DDE needs to be able to find it
63

64
    auto* d2h_block = dynamic_cast<structured_control_flow::Block*>(d2h.offload_node->get_parent().get_parent());
4✔
65
    if (d2h.offload_node->is_d2h()) {
4✔
66
        if (d2h.offload_node->is_free()) {
4✔
67
            std::string out_conn = d2h.host_access->src_conn();
4✔
68
            auto access_type = d2h.host_access->base_type().clone();
4✔
69
            builder.remove_memlet(*d2h_block, *d2h.host_access);
4✔
70
            builder.remove_node(*d2h_block, *d2h.host_data);
4✔
71
            d2h.offload_node->remove_d2h();
4✔
72
        } else { // remove it entirely
4✔
73
            builder.clear_code_node_legacy(*d2h_block, *d2h.offload_node);
×
74
        }
×
75
        d2h.remove_d2h_parts();
4✔
76
        return true;
4✔
77
    } else {
4✔
78
        return false;
×
79
    }
×
80
}
4✔
81

82
bool DataTransferMinimizationPass::eliminate_transfer_pair(
83
    builder::StructuredSDFGBuilder& builder,
84
    analysis::OffloadHolder& copy_out,
85
    analysis::OffloadHolder& copy_in,
86
    const analysis::DataTransferEliminationAnalysis& transfer_analysis,
87
    analysis::ControlFlowAnalysis& cf_analysis,
88
    bool remove_d2h
89
) {
7✔
90
    // Get all relevant information
91
    std::string copy_out_device_container = copy_out.dev_data->data();
7✔
92
    std::string copy_in_device_container = copy_in.dev_data->data();
7✔
93
    DebugInfo copy_out_src_debinfo = copy_out.dev_data->debug_info();
7✔
94
    DebugInfo copy_in_dst_debinfo = copy_in.dev_data->debug_info();
7✔
95
    DebugInfo copy_out_off_debinfo = copy_out.offload_node->debug_info();
7✔
96
    DebugInfo copy_in_off_debinfo = copy_in.offload_node->debug_info();
7✔
97

98
    const bool aliases = copy_out_device_container != copy_in_device_container;
7✔
99

100
    // When the two device buffers differ we will alias `T = S` (T == copy_in's buffer, S == copy_out's). After that
101
    // both buffers name the same storage, so any deallocation of S and of T would free it twice. Reconcile the two
102
    // device frees up front: keep the one that post-dominates the other (it outlives every use of the shared storage)
103
    // and drop the dominated one. If the lifetimes are not ordered we cannot prove a single safe free, so we bail out
104
    // before mutating anything. OffloadTransform emits these frees locally and never reconciles them, which is the
105
    // double-free this guards against.
106
    const analysis::DeviceFreeSite* free_to_drop = nullptr;
7✔
107
    bool drop_free_via_copy_out = false; // the dominated free is fused into copy_out's D2H node
7✔
108
    if (aliases) {
7✔
NEW
109
        const auto* frees_s = transfer_analysis.device_frees(copy_out_device_container);
×
NEW
110
        const auto* frees_t = transfer_analysis.device_frees(copy_in_device_container);
×
NEW
111
        const bool s_freed = frees_s != nullptr && !frees_s->empty();
×
NEW
112
        const bool t_freed = frees_t != nullptr && !frees_t->empty();
×
NEW
113
        if (s_freed && t_freed) {
×
114
            // Only the unambiguous single-free-each topology is reconcilable here.
NEW
115
            if (frees_s->size() != 1 || frees_t->size() != 1) {
×
NEW
116
                return false;
×
NEW
117
            }
×
NEW
118
            const analysis::DeviceFreeSite& free_s = frees_s->front();
×
NEW
119
            const analysis::DeviceFreeSite& free_t = frees_t->front();
×
NEW
120
            if (cf_analysis.post_dominates(*free_t.block, *free_s.block)) {
×
NEW
121
                free_to_drop = &free_s; // keep T's (later) free
×
NEW
122
            } else if (cf_analysis.post_dominates(*free_s.block, *free_t.block)) {
×
NEW
123
                free_to_drop = &free_t; // keep S's (later) free
×
NEW
124
            } else {
×
NEW
125
                return false; // frees on incomparable paths: cannot prove a single safe deallocation
×
NEW
126
            }
×
127
            // If the dominated free is fused into copy_out's D2H node we must strip only the free part (the D2H itself
128
            // may still feed a live host buffer), handled below via remove_free().
NEW
129
            drop_free_via_copy_out = free_to_drop->node == copy_out.offload_node;
×
NEW
130
        }
×
131
        // If at most one of the buffers is freed there is no double free to reconcile.
NEW
132
    }
×
133

134
    bool remove_entirely = false;
7✔
135
    // Remove what you can remove
136
    if (!remove_d2h && copy_out.offload_node->is_free()) {
7✔
137
        if (copy_out.offload_node->is_d2h()) {
7✔
138
            // Only drop copy_out's fused free if it is the deallocation we decided to remove (same-container case keeps
139
            // the historical behaviour of always dropping it).
140
            if (!aliases || drop_free_via_copy_out) {
3✔
141
                copy_out.offload_node->remove_free();
3✔
142
            }
3✔
143
        } else {
4✔
144
            remove_entirely = true;
4✔
145
        }
4✔
146
    } else if (remove_d2h) {
7✔
147
        remove_entirely = true;
×
148
    }
×
149
    if (remove_entirely) {
7✔
150
        auto* copy_out_block =
4✔
151
            dynamic_cast<structured_control_flow::Block*>(copy_out.offload_node->get_parent().get_parent());
4✔
152
        builder.clear_code_node_legacy(*copy_out_block, *copy_out.offload_node);
4✔
153
    }
4✔
154

155
    auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(copy_in.offload_node->get_parent().get_parent()
7✔
156
    );
7✔
157
    builder.clear_code_node_legacy(*copy_in_block, *copy_in.offload_node);
7✔
158

159
    // Maps the device pointers if necessary
160
    if (aliases) {
7✔
161
        auto& container_type = builder.subject().type(copy_out_device_container);
×
162
        auto ref_type = container_type.clone();
×
163
        auto& in_access = builder.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
164
        auto& out_access = builder.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
165
        builder.add_reference_memlet(
×
166
            *copy_in_block,
×
167
            in_access,
×
168
            out_access,
×
169
            {symbolic::zero()},
×
170
            *ref_type,
×
171
            DebugInfo::merge(copy_out_off_debinfo, copy_in_off_debinfo)
×
172
        );
×
173

174
        // Drop the dominated device free of the now-shared storage (unless it was copy_out's fused free, already
175
        // stripped above).
NEW
176
        if (free_to_drop != nullptr && !drop_free_via_copy_out) {
×
NEW
177
            builder.clear_code_node_legacy(*free_to_drop->block, *free_to_drop->node);
×
NEW
178
        }
×
UNCOV
179
    }
×
180

181
    return true;
7✔
182
}
7✔
183

184
bool DataTransferMinimizationPass::
185
    run_pass(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
8✔
186
    analysis::DataTransferEliminationAnalysis transfer_analysis(builder.subject(), analysis_manager);
8✔
187
    transfer_analysis.run();
8✔
188

189
    auto& cf_analysis = analysis_manager.get<analysis::ControlFlowAnalysis>();
8✔
190

191
    int removed = 0;
8✔
192

193
    for (auto& [malloc_cand, first_h2d] : transfer_analysis.empty_malloc_candidates()) {
8✔
194
        auto& malloc_holder = *malloc_cand.offload;
3✔
195

196
        DEBUG_PRINTLN(
3✔
197
            "  Elim malloc: " << "#" << malloc_holder.malloc_node->element_id() << " -> "
3✔
198
                              << (malloc_holder.host_data ? malloc_holder.host_data->data() : "-") << " / "
3✔
199
                              << "h2d+malloc: #" << first_h2d.offload_node->element_id() << " "
3✔
200
                              << (first_h2d.host_data ? first_h2d.host_data->data() : "-") << " -> "
3✔
201
                              << first_h2d.dev_data->data()
3✔
202
        );
3✔
203

204
        bool success = eliminate_malloc_first_transfer(builder, malloc_holder, first_h2d);
3✔
205

206
        if (success) {
3✔
207
            ++removed;
3✔
208
        }
3✔
209
    }
3✔
210

211
    for (auto& [h2d_cand, redundant_d2h] : transfer_analysis.redundant_d2h_candidates()) {
8✔
212
        auto& h2d_holder = *h2d_cand.offload;
6✔
213

214
        if (h2d_holder.updates_on_dev) {
6✔
215
            DEBUG_PRINTLN(
4✔
216
                "  Elim h2d: " << "#" << h2d_holder.offload_node->element_id() << " -> " << h2d_holder.host_data->data()
4✔
217
                               << " -> " << h2d_holder.dev_data->data() << " / "
4✔
218
                               << "clean d2h: #" << redundant_d2h.offload_node->element_id() << " "
4✔
219
                               << redundant_d2h.dev_data->data() << " -> " << redundant_d2h.host_data->data()
4✔
220
            );
4✔
221
            bool success = eliminate_redundant_d2h(builder, h2d_holder, redundant_d2h);
4✔
222

223
            if (success) {
4✔
224
                ++removed;
4✔
225
            }
4✔
226
        }
4✔
227
    }
6✔
228

229
    for (auto& candidate : transfer_analysis.transfer_reuse_candidates()) {
8✔
230
        auto reads = candidate.first.read_count;
7✔
231
        auto& copy_out = *candidate.first.offload;
7✔
232
        auto& copy_in = candidate.second;
7✔
233
        auto& copy_in_container = copy_in.host_data->data();
7✔
234

235
        DEBUG_PRINTLN(
7✔
236
            "  Elim hd2: #" << copy_out.offload_node->element_id() << " " << copy_out.dev_data->data() << " -> "
7✔
237
                            << (copy_out.host_data ? copy_out.host_data->data() : "-") << " / "
7✔
238
                            << "d2h: #" << copy_in.offload_node->element_id() << " "
7✔
239
                            << (copy_in.host_data ? copy_in.host_data->data() : "-") << " -> "
7✔
240
                            << copy_in.dev_data->data()
7✔
241
        );
7✔
242

243
        bool success = eliminate_transfer_pair(builder, copy_out, copy_in, transfer_analysis, cf_analysis, false);
7✔
244

245
        if (success) {
7✔
246
            ++removed;
7✔
247
        }
7✔
248
    }
7✔
249

250
    return removed > 0;
8✔
251
}
8✔
252

253
DataTransferMinimizationLegacy::
254
    DataTransferMinimizationLegacy(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager)
255
    : visitor::NonStoppingStructuredSDFGVisitor(builder, analysis_manager) {}
×
256

257
bool DataTransferMinimizationLegacy::visit() {
×
258
    DEBUG_PRINTLN("Running DataTransferMinimizationPass on " << this->builder_.subject().name());
×
259
    return visitor::NonStoppingStructuredSDFGVisitor::visit();
×
260
}
×
261

262
bool DataTransferMinimizationLegacy::accept(structured_control_flow::Sequence& sequence) {
×
263
    bool applied = false;
×
264
    offloading::DataOffloadingNode* copy_out = nullptr;
×
265
    structured_control_flow::Block* copy_out_block = nullptr;
×
266
    size_t copy_out_index = 0;
×
267

268
    // While a copy-out can be found:
269
    while (copy_out_index < sequence.size()) {
×
270
        // Find a new copy-out
271
        for (; copy_out_index < sequence.size(); copy_out_index++) {
×
272
            if (auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(copy_out_index).first)) {
×
273
                if (block->dataflow().library_nodes().size() == 1 && block->dataflow().tasklets().size() == 0) {
×
274
                    auto* libnode = *block->dataflow().library_nodes().begin();
×
275
                    if (auto* offloading_node = dynamic_cast<offloading::DataOffloadingNode*>(libnode)) {
×
276
                        if (offloading_node->is_d2h()) {
×
277
                            copy_out = offloading_node;
×
278
                            copy_out_block = block;
×
279
                            break;
×
280
                        }
×
281
                    }
×
282
                }
×
283
            }
×
284
        }
×
285

286
        // Find a matching copy-in
287
        size_t i;
×
288
        for (i = copy_out_index; i < sequence.size(); i++) {
×
289
            // Child must be a block
290
            auto* copy_in_block = dynamic_cast<structured_control_flow::Block*>(&sequence.at(i).first);
×
291
            if (!copy_in_block) {
×
292
                continue;
×
293
            }
×
294

295
            // Block must contain exactly one library node
296
            if (copy_in_block->dataflow().library_nodes().size() != 1 ||
×
297
                copy_in_block->dataflow().tasklets().size() != 0) {
×
298
                continue;
×
299
            }
×
300

301
            // Library node must be an offloading node
302
            auto* copy_in =
×
303
                dynamic_cast<offloading::DataOffloadingNode*>(*copy_in_block->dataflow().library_nodes().begin());
×
304
            if (!copy_in) {
×
305
                continue;
×
306
            }
×
307

308
            // Offloading node must be a copy-in
309
            if (!copy_in->is_h2d()) {
×
310
                continue;
×
311
            }
×
312

313
            // Copy-in and copy-out must be redundant
314
            if (!copy_out->redundant_with(*copy_in)) {
×
315
                continue;
×
316
            }
×
317

318
            // Get src and dst access nodes for copy-in & -out
319
            auto [copy_out_src, copy_out_dst] = this->get_src_and_dst(copy_out_block->dataflow(), copy_out);
×
320
            auto [copy_in_src, copy_in_dst] = this->get_src_and_dst(copy_in_block->dataflow(), copy_in);
×
321

322
            // Get the write and read users
323
            auto& users = this->analysis_manager_.get<analysis::Users>();
×
324
            analysis::User* write = users.get_user(copy_out_dst->data(), copy_out_dst, analysis::Use::WRITE);
×
325
            if (!write) {
×
326
                continue;
×
327
            }
×
328
            analysis::User* read = users.get_user(copy_in_src->data(), copy_in_src, analysis::Use::READ);
×
329
            if (!read) {
×
330
                continue;
×
331
            }
×
332

333
            if (copy_out_dst->data() == copy_in_src->data()) {
×
334
                // Ensure that the container is not written between the data transfer nodes
335
                bool used_between = false;
×
336
                for (auto* user : users.all_uses_between(*write, *read)) {
×
337
                    if (user->container() == copy_out_dst->data() && user->use() != analysis::Use::READ) {
×
338
                        used_between = true;
×
339
                        break;
×
340
                    }
×
341
                }
×
342
                if (used_between) {
×
343
                    continue;
×
344
                }
×
345
            } else {
×
346
                if (!this->check_container_dependency(
×
347
                        copy_out_block, copy_out_dst->data(), copy_in_block, copy_in_src->data()
×
348
                    )) {
×
349
                    continue;
×
350
                }
×
351
            }
×
352

353
            // Check that the container is not written after the data transfer nodes
354
            bool read_after = false;
×
355
            for (auto* user : users.all_uses_after(*write)) {
×
356
                if (user->container() == copy_out_dst->data() && user->use() == analysis::Use::READ && user != read) {
×
357
                    read_after = true;
×
358
                    break;
×
359
                }
×
360
            }
×
361

362
            // Debug output
363
            DEBUG_PRINTLN(
×
364
                "  Eliminating " << (read_after ? "(" : "") << "copy-out: #" << copy_out->element_id() << " "
×
365
                                 << copy_out_src->data() << " -> " << copy_out_dst->data() << (read_after ? ")" : "")
×
366
                                 << " / copy-in: #" << copy_in->element_id() << " " << copy_in_src->data() << " -> "
×
367
                                 << copy_in_dst->data()
×
368
            );
×
369

370
            // Get all relevant information
371
            std::string copy_out_device_container = copy_out_src->data();
×
372
            std::string copy_in_device_container = copy_in_dst->data();
×
373
            DebugInfo copy_out_src_debinfo = copy_out_src->debug_info();
×
374
            DebugInfo copy_in_dst_debinfo = copy_in_dst->debug_info();
×
375

376
            // Remove the data tranfers
377
            if (read_after && copy_out->is_free()) {
×
378
                copy_out->remove_free();
×
379
            } else if (!read_after) {
×
380
                this->builder_.clear_code_node_legacy(*copy_out_block, *copy_out);
×
381
            }
×
382
            this->builder_.clear_code_node_legacy(*copy_in_block, *copy_in);
×
383

384
            // Maps the device pointers if necessary
385
            if (copy_out_device_container != copy_in_device_container) {
×
386
                auto& container_type = this->builder_.subject().type(copy_out_device_container);
×
387
                auto ref_type = container_type.clone();
×
388
                auto& in_access =
×
389
                    this->builder_.add_access(*copy_in_block, copy_out_device_container, copy_out_src_debinfo);
×
390
                auto& out_access =
×
391
                    this->builder_.add_access(*copy_in_block, copy_in_device_container, copy_in_dst_debinfo);
×
392
                this->builder_.add_reference_memlet(
×
393
                    *copy_in_block,
×
394
                    in_access,
×
395
                    out_access,
×
396
                    {symbolic::zero()},
×
397
                    *ref_type,
×
398
                    DebugInfo::merge(copy_out->debug_info(), copy_in->debug_info())
×
399
                );
×
400
            }
×
401

402
            // Invalidate users analysis
403
            this->analysis_manager_.invalidate<analysis::Users>();
×
404
            applied = true;
×
405
            break;
×
406
        }
×
407

408
        // Skip if no matching copy-in was found
409
        if (i >= sequence.size()) {
×
410
            copy_out_index++;
×
411
        }
×
412
    }
×
413

414
    return applied;
×
415
}
×
416

417
std::pair<data_flow::AccessNode*, data_flow::AccessNode*> DataTransferMinimizationLegacy::
418
    get_src_and_dst(data_flow::DataFlowGraph& dfg, offloading::DataOffloadingNode* offloading_node) {
×
419
    if (!offloading_node->has_transfer()) {
×
420
        throw InvalidSDFGException(
×
421
            "DataTransferMinimization: Cannot get copy access nodes for offloading node without data transfers"
×
422
        );
×
423
    }
×
424
    data_flow::AccessNode *src, *dst;
×
425
    if (dynamic_cast<cuda::CUDADataOffloadingNode*>(offloading_node)) {
×
426
        src = this->get_in_access(offloading_node, "_src");
×
427
        dst = this->get_out_access(offloading_node, "_dst");
×
428
    } else if (dynamic_cast<rocm::ROCMDataOffloadingNode*>(offloading_node)) {
×
429
        src = this->get_in_access(offloading_node, "_src");
×
430
        dst = this->get_out_access(offloading_node, "_dst");
×
431
    } else {
×
432
        throw InvalidSDFGException(
×
433
            "DataTransferMinimization: Unknown offloading node encountered: " + offloading_node->code().value()
×
434
        );
×
435
    }
×
436
    return {src, dst};
×
437
}
×
438

439
data_flow::AccessNode* DataTransferMinimizationLegacy::
440
    get_in_access(data_flow::CodeNode* node, const std::string& dst_conn) {
×
441
    auto& dfg = node->get_parent();
×
442
    for (auto& iedge : dfg.in_edges(*node)) {
×
443
        if (iedge.dst_conn() == dst_conn) {
×
444
            return dynamic_cast<data_flow::AccessNode*>(&iedge.src());
×
445
        }
×
446
    }
×
447
    return nullptr;
×
448
}
×
449

450
data_flow::AccessNode* DataTransferMinimizationLegacy::
451
    get_out_access(data_flow::CodeNode* node, const std::string& src_conn) {
×
452
    auto& dfg = node->get_parent();
×
453
    for (auto& oedge : dfg.out_edges(*node)) {
×
454
        if (oedge.src_conn() == src_conn) {
×
455
            return static_cast<data_flow::AccessNode*>(&oedge.dst());
×
456
        }
×
457
    }
×
458
    return nullptr;
×
459
}
×
460

461
bool DataTransferMinimizationLegacy::check_container_dependency(
462
    structured_control_flow::Block* copy_out_block,
463
    const std::string& copy_out_container,
464
    structured_control_flow::Block* copy_in_block,
465
    const std::string& copy_in_container
466
) {
×
467
    // Simplification: Assume blocks are in the same sequence
468
    auto* copy_out_block_parent = copy_out_block->get_parent();
×
469
    auto* copy_in_block_parent = copy_in_block->get_parent();
×
470
    auto* sequence = dynamic_cast<structured_control_flow::Sequence*>(copy_out_block_parent);
×
471
    if (copy_out_block_parent != copy_in_block_parent || !sequence) {
×
472
        return false;
×
473
    }
×
474

475
    std::unordered_set<std::string> copy_out_container_captures, copy_in_container_parts;
×
476
    size_t start = sequence->index(*copy_out_block);
×
477
    size_t stop = sequence->index(*copy_in_block);
×
478
    for (size_t i = start + 1; i < stop; i++) {
×
479
        auto* block = dynamic_cast<structured_control_flow::Block*>(&sequence->at(i).first);
×
480
        if (!block) {
×
481
            continue;
×
482
        }
×
483

484
        auto& dfg = block->dataflow();
×
485
        for (auto* access_node : dfg.data_nodes()) {
×
486
            if (access_node->data() == copy_in_container) {
×
487
                // Only allow constant assignments
488
                for (auto& iedge : dfg.in_edges(*access_node)) {
×
489
                    auto* tasklet = dynamic_cast<data_flow::Tasklet*>(&iedge.src());
×
490
                    if (!tasklet || tasklet->code() != data_flow::TaskletCode::assign) {
×
491
                        continue;
×
492
                    }
×
493

494
                    auto& iedge2 = *dfg.in_edges(*tasklet).begin();
×
495
                    if (!dynamic_cast<data_flow::ConstantNode*>(&iedge2.src())) {
×
496
                        return false;
×
497
                    }
×
498
                }
×
499

500
                // Collect H2D container parts
501
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
502
                    if (oedge.type() != data_flow::MemletType::Reference) {
×
503
                        continue;
×
504
                    }
×
505

506
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
507
                    if (!access_node2) {
×
508
                        continue;
×
509
                    }
×
510

511
                    copy_in_container_parts.insert(access_node2->data());
×
512
                }
×
513
            } else if (access_node->data() == copy_out_container) {
×
514
                // Collect D2H container captures
515
                for (auto& oedge : dfg.out_edges(*access_node)) {
×
516
                    if (oedge.type() != data_flow::MemletType::Dereference_Dst) {
×
517
                        continue;
×
518
                    }
×
519

520
                    auto* access_node2 = dynamic_cast<data_flow::AccessNode*>(&oedge.dst());
×
521
                    if (!access_node2) {
×
522
                        continue;
×
523
                    }
×
524

525
                    copy_out_container_captures.insert(access_node2->data());
×
526
                }
×
527
            }
×
528
        }
×
529
    }
×
530

531
    // Find all matches between captures and parts
532
    size_t matches = 0;
×
533
    for (auto& capture : copy_out_container_captures) {
×
534
        for (auto& part : copy_in_container_parts) {
×
535
            if (capture == part) {
×
536
                matches++;
×
537
            }
×
538
        }
×
539
    }
×
540

541
    return (matches == 1);
×
542
}
×
543

544
} // namespace passes
545
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc