• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28303955550

27 Jun 2026 10:38PM UTC coverage: 61.924% (+0.2%) from 61.754%
28303955550

Pull #814

github

web-flow
Merge 89b94697f into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

568 of 859 new or added lines in 16 files covered. (66.12%)

3 existing lines in 1 file now uncovered.

39450 of 63707 relevant lines covered (61.92%)

967.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

66.93
/opt/src/transformations/offloading/offload_transform.cpp
1
#include "sdfg/transformations/offloading/offload_transform.h"
2

3
#include <map>
4
#include <string>
5

6
#include "sdfg/analysis/type_analysis.h"
7
#include "sdfg/data_flow/access_node.h"
8
#include "sdfg/structured_control_flow/block.h"
9
#include "sdfg/structured_control_flow/if_else.h"
10
#include "sdfg/structured_control_flow/map.h"
11
#include "sdfg/symbolic/symbolic.h"
12

13
#include "sdfg/data_flow/library_node.h"
14
#include "sdfg/optimization_report/pass_report_consumer.h"
15
#include "sdfg/types/utils.h"
16
#include "sdfg/visitor/immutable_structured_sdfg_visitor.h"
17
#include "symengine/symengine_rcp.h"
18

19
namespace sdfg {
20
namespace transformations {
21

22
class SideEffectFinder : public visitor::ImmutableStructuredSDFGVisitor {
23
private:
24
    structured_control_flow::StructuredLoop& loop_;
25

26
public:
27
    SideEffectFinder(
28
        StructuredSDFG& sdfg, analysis::AnalysisManager& analysis_manager, structured_control_flow::StructuredLoop& loop
29
    )
30
        : visitor::ImmutableStructuredSDFGVisitor(sdfg, analysis_manager), loop_(loop) {}
20✔
31

32
    bool visit() override { return visit_internal(loop_.root()); }
20✔
33

34
    bool accept(structured_control_flow::Block& node) override {
21✔
35
        for (const auto& lib_node : node.dataflow().library_nodes()) {
21✔
36
            if (lib_node->side_effect()) {
×
37
                return true;
×
38
            }
×
39
        }
×
40
        return false;
21✔
41
    }
21✔
42
};
43

44
OffloadTransform::OffloadTransform(structured_control_flow::StructuredLoop& loop, bool allow_dynamic_sizes)
45
    : loop_(loop), allow_dynamic_sizes_(allow_dynamic_sizes) {}
38✔
46

47

48
bool OffloadTransform::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
20✔
49
    if (dynamic_cast<structured_control_flow::Map*>(&loop_) == nullptr &&
20✔
50
        dynamic_cast<structured_control_flow::Reduce*>(&loop_) == nullptr) {
20✔
NEW
51
        throw InvalidTransformationDescriptionException("OffloadTransform: can only offload Map or Reduce nodes.");
×
NEW
52
    }
×
53

54
    if (dynamic_cast<structured_control_flow::Reduce*>(&loop_)) {
20✔
NEW
55
        if (report_) report_->transform_impossible(this, "reduce");
×
NEW
56
        DEBUG_PRINTLN("Cannot apply transform: Reduce nodes are not offloaded yet");
×
NEW
57
        return false;
×
NEW
58
    }
×
59

60
    auto& sdfg = builder.subject();
20✔
61

62
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
20✔
63

64
    if (!arguments_analysis.inferred_types(analysis_manager, this->loop_)) {
20✔
65
        if (report_) report_->transform_impossible(this, "unranged args");
×
66
        DEBUG_PRINTLN("Cannot apply transform: argument types not inferred");
×
67
        return false;
×
68
    }
×
69
    auto& arguments = arguments_analysis.arguments(analysis_manager, this->loop_);
20✔
70

71
    // Criterion: arg Data Types must be continuous
72
    for (auto& [argument, meta] : arguments) {
41✔
73
        auto base_type = analysis::TypeAnalysis(sdfg, &loop_, analysis_manager).get_outer_type(argument);
41✔
74
        if (base_type == nullptr) {
41✔
75
            if (report_) report_->transform_impossible(this, "cannot infer type");
×
76
            DEBUG_PRINTLN("Cannot apply transform: argument type cannot be inferred");
×
77
            return false;
×
78
        }
×
79
        if (!types::is_contiguous_type(*base_type, sdfg)) {
41✔
80
            if (report_) report_->transform_impossible(this, "type is not contiguous");
×
81
            DEBUG_PRINTLN("Cannot apply transform: argument type is not contiguous");
×
82
            return false;
×
83
        }
×
84
        if (meta.is_scalar && meta.is_output) {
41✔
85
            if (report_) report_->transform_impossible(this, "scalar output");
×
86
            DEBUG_PRINTLN("Cannot apply transform: map writes to scalar argument");
×
87
            return false;
×
88
        }
×
89
    }
41✔
90

91
    // Note: arbitrary `init` and `stride` are permitted on the kernel-boundary
92
    // Map. The CUDA/ROCm dispatchers emit
93
    //   `<map.indvar> = init + thread_flat_id * stride`,
94
    // and `num_iterations()` already accounts for both when computing the grid
95
    // geometry.
96
    if (loop_.num_iterations().is_null()) {
20✔
97
        if (report_) report_->transform_impossible(this, "cannot determine num iterations");
×
98
        DEBUG_PRINTLN("Cannot apply transform: cannot determine number of iterations for map");
×
99
        return false;
×
100
    }
×
101

102
    // Criterion: Map cannot write to scalar arguments
103
    for (auto& [argument, meta] : arguments) {
41✔
104
        if (meta.is_scalar && meta.is_output) {
41✔
105
            if (report_) report_->transform_impossible(this, "scalar output");
×
106
            DEBUG_PRINTLN("Cannot apply transform: map writes to scalar argument");
×
107
            return false;
×
108
        }
×
109
    }
41✔
110

111
    if (!arguments_analysis.argument_size_known(analysis_manager, this->loop_, allow_dynamic_sizes_)) {
20✔
112
        if (report_) report_->transform_impossible(this, "args not understood");
×
113
        DEBUG_PRINTLN("Cannot apply transform: argument sizes not known");
×
114
        return false;
×
115
    }
×
116

117
    // Criterion: Map cannot contain function calls with side effects (e.g. library nodes that write to memory)
118
    SideEffectFinder side_effect_finder(sdfg, analysis_manager, this->loop_);
20✔
119
    if (side_effect_finder.visit()) {
20✔
120
        if (report_) report_->transform_impossible(this, "side effects");
×
121
        DEBUG_PRINTLN("Cannot apply transform: map contains library nodes with side effects");
×
122
        return false;
×
123
    }
×
124

125
    if (report_) report_->transform_possible(this);
20✔
126
    return true;
20✔
127
}
20✔
128

129
void OffloadTransform::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
18✔
130
    // Schedule
131
    builder.update_schedule_type(this->loop_, transformed_schedule_type());
18✔
132

133
    auto& sdfg = builder.subject();
18✔
134

135
    // Identify arguments and locals
136
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
18✔
137

138
    auto& arguments = arguments_analysis.arguments(analysis_manager, this->loop_);
18✔
139
    auto& locals = arguments_analysis.locals(analysis_manager, this->loop_);
18✔
140

141
    // Infer subsets for arguments
142
    auto& argument_sizes = arguments_analysis.argument_sizes(analysis_manager, this->loop_, allow_dynamic_sizes_);
18✔
143

144
    auto parent_scope = static_cast<structured_control_flow::Sequence*>(this->loop_.get_parent());
18✔
145

146
    // Key the device-buffer names by THIS map's element id so every offloaded loop nest gets its own SSA device
147
    // buffer. Keying by the parent scope instead would make two maps under the same sequence that offload the same
148
    // host container resolve to one device name, allocated and freed once per map -- a single device name carrying
149
    // multiple alloc/free lifetimes. DataTransferMinimization's reuse reconciliation bails on exactly that
150
    // (a device buffer with more than one free cannot be proven double-free safe), which blocks the D2H->H2D
151
    // device aliasing and leaves a redundant host round-trip. One buffer per loop nest keeps each name single-alloc
152
    // single-free, so the reuse fires and DeadDataElimination can drop the now-userless staging container.
153
    std::string container_prefix = copy_prefix() + std::to_string(this->loop_.element_id()) + "_";
18✔
154

155
    // Allocate arguments and locals
156
    allocate_locals_on_device_stack(builder, analysis_manager, locals);
18✔
157
    handle_device_setup_and_teardown(builder, arguments, argument_sizes, container_prefix);
18✔
158

159
    // Copy-in arguments to device memory & allocation
160
    for (auto& [argument, meta] : arguments) {
37✔
161
        if (!meta.is_ptr) {
37✔
162
            continue;
18✔
163
        }
18✔
164
        auto argument_device = container_prefix + argument;
19✔
165
        auto& new_block = builder.add_block_before(*parent_scope, this->loop_, {}, this->loop_.debug_info());
19✔
166
        auto& size = argument_sizes.at(argument);
19✔
167
        copy_to_device_with_allocation(builder, argument, argument_device, size, SymEngine::null, new_block);
19✔
168
    }
19✔
169

170
    update_loop_containers(arguments, container_prefix);
18✔
171

172
    // Copy-out arguments to host memory & free
173
    for (auto& [argument, meta] : arguments) {
37✔
174
        if (!meta.is_ptr) {
37✔
175
            continue;
18✔
176
        }
18✔
177
        auto argument_device = container_prefix + argument;
19✔
178
        auto& new_block = builder.add_block_after(*parent_scope, this->loop_, {}, this->loop_.debug_info());
19✔
179
        auto& size = argument_sizes.at(argument);
19✔
180
        if (!skip_unneeded_d2h_ || meta.is_output) {
19✔
181
            copy_from_device_with_free(builder, new_block, argument, argument_device, size, SymEngine::null);
17✔
182
        } else {
17✔
183
            deallocate_device_arg(builder, new_block, argument_device, size, SymEngine::null);
2✔
184
        }
2✔
185
    }
19✔
186

187
    if (report_) report_->transform_applied(this);
18✔
188
}
18✔
189

190
void OffloadTransform::handle_device_setup_and_teardown(
191
    builder::StructuredSDFGBuilder& builder,
192

193
    const std::map<std::string, analysis::RegionArgument>& arguments,
194
    const std::unordered_map<std::string, symbolic::Expression>& argument_sizes,
195
    std::string prefix
196
) {
18✔
197
    // Add managed buffers for pointer arguments
198
    for (auto& [argument, meta] : arguments) {
37✔
199
        if (!meta.is_ptr || builder.subject().exists(prefix + argument)) {
37✔
200
            continue;
18✔
201
        }
18✔
202
        auto argument_device = prefix + argument;
19✔
203

204
        auto arg_size = argument_sizes.at(argument);
19✔
205

206
        add_device_buffer(builder, argument, argument_device, arg_size);
19✔
207
    }
19✔
208
}
18✔
209

210
void OffloadTransform::
211
    update_loop_containers(const std::map<std::string, analysis::RegionArgument>& arguments, std::string prefix) {
18✔
212
    for (auto& [argument, meta] : arguments) {
37✔
213
        if (meta.is_ptr) {
37✔
214
            auto argument_device = prefix + argument;
19✔
215
            this->loop_.replace(symbolic::symbol(argument), symbolic::symbol(argument_device));
19✔
216
        }
19✔
217
    }
37✔
218
}
18✔
219

220
} // namespace transformations
221
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc