• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28302975093

27 Jun 2026 09:55PM UTC coverage: 61.929% (+0.2%) from 61.754%
28302975093

Pull #814

github

web-flow
Merge d9618e50c into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

567 of 854 new or added lines in 16 files covered. (66.39%)

17 existing lines in 1 file now uncovered.

39450 of 63702 relevant lines covered (61.93%)

968.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

68.85
/opt/src/transformations/offloading/offload_transform.cpp
1
#include "sdfg/transformations/offloading/offload_transform.h"
2

3
#include <map>
4
#include <string>
5

6
#include "sdfg/analysis/type_analysis.h"
7
#include "sdfg/data_flow/access_node.h"
8
#include "sdfg/structured_control_flow/block.h"
9
#include "sdfg/structured_control_flow/if_else.h"
10
#include "sdfg/structured_control_flow/map.h"
11
#include "sdfg/symbolic/symbolic.h"
12

13
#include "sdfg/data_flow/library_node.h"
14
#include "sdfg/optimization_report/pass_report_consumer.h"
15
#include "sdfg/types/utils.h"
16
#include "sdfg/visitor/immutable_structured_sdfg_visitor.h"
17
#include "symengine/symengine_rcp.h"
18

19
namespace sdfg {
20
namespace transformations {
21

22
class SideEffectFinder : public visitor::ImmutableStructuredSDFGVisitor {
23
private:
24
    structured_control_flow::StructuredLoop& loop_;
25

26
public:
27
    SideEffectFinder(
28
        StructuredSDFG& sdfg, analysis::AnalysisManager& analysis_manager, structured_control_flow::StructuredLoop& loop
29
    )
30
        : visitor::ImmutableStructuredSDFGVisitor(sdfg, analysis_manager), loop_(loop) {}
20✔
31

32
    bool visit() override { return visit_internal(loop_.root()); }
20✔
33

34
    bool accept(structured_control_flow::Block& node) override {
21✔
35
        for (const auto& lib_node : node.dataflow().library_nodes()) {
21✔
36
            if (lib_node->side_effect()) {
×
37
                return true;
×
38
            }
×
39
        }
×
40
        return false;
21✔
41
    }
21✔
42
};
43

44
OffloadTransform::OffloadTransform(structured_control_flow::StructuredLoop& loop, bool allow_dynamic_sizes)
45
    : loop_(loop), allow_dynamic_sizes_(allow_dynamic_sizes) {}
38✔
46

47

48
bool OffloadTransform::can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
20✔
49
    if (dynamic_cast<structured_control_flow::Map*>(&loop_) == nullptr &&
20✔
50
        dynamic_cast<structured_control_flow::Reduce*>(&loop_) == nullptr) {
20✔
NEW
51
        throw InvalidTransformationDescriptionException("OffloadTransform: can only offload Map or Reduce nodes.");
×
NEW
52
    }
×
53

54
    auto& sdfg = builder.subject();
20✔
55

56
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
20✔
57

58
    if (!arguments_analysis.inferred_types(analysis_manager, this->loop_)) {
20✔
59
        if (report_) report_->transform_impossible(this, "unranged args");
×
UNCOV
60
        DEBUG_PRINTLN("Cannot apply transform: argument types not inferred");
×
UNCOV
61
        return false;
×
UNCOV
62
    }
×
63
    auto& arguments = arguments_analysis.arguments(analysis_manager, this->loop_);
20✔
64

65
    // Criterion: arg Data Types must be continuous
66
    for (auto& [argument, meta] : arguments) {
41✔
67
        auto base_type = analysis::TypeAnalysis(sdfg, &loop_, analysis_manager).get_outer_type(argument);
41✔
68
        if (base_type == nullptr) {
41✔
69
            if (report_) report_->transform_impossible(this, "cannot infer type");
×
UNCOV
70
            DEBUG_PRINTLN("Cannot apply transform: argument type cannot be inferred");
×
UNCOV
71
            return false;
×
UNCOV
72
        }
×
73
        if (!types::is_contiguous_type(*base_type, sdfg)) {
41✔
UNCOV
74
            if (report_) report_->transform_impossible(this, "type is not contiguous");
×
75
            DEBUG_PRINTLN("Cannot apply transform: argument type is not contiguous");
×
76
            return false;
×
77
        }
×
78
        if (meta.is_scalar && meta.is_output) {
41✔
UNCOV
79
            if (report_) report_->transform_impossible(this, "scalar output");
×
80
            DEBUG_PRINTLN("Cannot apply transform: map writes to scalar argument");
×
81
            return false;
×
82
        }
×
83
    }
41✔
84

85
    // Note: arbitrary `init` and `stride` are permitted on the kernel-boundary
86
    // Map. The CUDA/ROCm dispatchers emit
87
    //   `<map.indvar> = init + thread_flat_id * stride`,
88
    // and `num_iterations()` already accounts for both when computing the grid
89
    // geometry.
90
    if (loop_.num_iterations().is_null()) {
20✔
UNCOV
91
        if (report_) report_->transform_impossible(this, "cannot determine num iterations");
×
UNCOV
92
        DEBUG_PRINTLN("Cannot apply transform: cannot determine number of iterations for map");
×
UNCOV
93
        return false;
×
UNCOV
94
    }
×
95

96
    // Criterion: Map cannot write to scalar arguments
97
    for (auto& [argument, meta] : arguments) {
41✔
98
        if (meta.is_scalar && meta.is_output) {
41✔
99
            if (report_) report_->transform_impossible(this, "scalar output");
×
100
            DEBUG_PRINTLN("Cannot apply transform: map writes to scalar argument");
×
UNCOV
101
            return false;
×
UNCOV
102
        }
×
103
    }
41✔
104

105
    if (!arguments_analysis.argument_size_known(analysis_manager, this->loop_, allow_dynamic_sizes_)) {
20✔
106
        if (report_) report_->transform_impossible(this, "args not understood");
×
107
        DEBUG_PRINTLN("Cannot apply transform: argument sizes not known");
×
108
        return false;
×
UNCOV
109
    }
×
110

111
    // Criterion: Map cannot contain function calls with side effects (e.g. library nodes that write to memory)
112
    SideEffectFinder side_effect_finder(sdfg, analysis_manager, this->loop_);
20✔
113
    if (side_effect_finder.visit()) {
20✔
114
        if (report_) report_->transform_impossible(this, "side effects");
×
115
        DEBUG_PRINTLN("Cannot apply transform: map contains library nodes with side effects");
×
UNCOV
116
        return false;
×
UNCOV
117
    }
×
118

119
    if (report_) report_->transform_possible(this);
20✔
120
    return true;
20✔
121
}
20✔
122

123
void OffloadTransform::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
18✔
124
    // Schedule
125
    builder.update_schedule_type(this->loop_, transformed_schedule_type());
18✔
126

127
    auto& sdfg = builder.subject();
18✔
128

129
    // Identify arguments and locals
130
    auto& arguments_analysis = analysis_manager.get<analysis::ArgumentsAnalysis>();
18✔
131

132
    auto& arguments = arguments_analysis.arguments(analysis_manager, this->loop_);
18✔
133
    auto& locals = arguments_analysis.locals(analysis_manager, this->loop_);
18✔
134

135
    // Infer subsets for arguments
136
    auto& argument_sizes = arguments_analysis.argument_sizes(analysis_manager, this->loop_, allow_dynamic_sizes_);
18✔
137

138
    auto parent_scope = static_cast<structured_control_flow::Sequence*>(this->loop_.get_parent());
18✔
139

140
    // Key the device-buffer names by THIS map's element id so every offloaded loop nest gets its own SSA device
141
    // buffer. Keying by the parent scope instead would make two maps under the same sequence that offload the same
142
    // host container resolve to one device name, allocated and freed once per map -- a single device name carrying
143
    // multiple alloc/free lifetimes. DataTransferMinimization's reuse reconciliation bails on exactly that
144
    // (a device buffer with more than one free cannot be proven double-free safe), which blocks the D2H->H2D
145
    // device aliasing and leaves a redundant host round-trip. One buffer per loop nest keeps each name single-alloc
146
    // single-free, so the reuse fires and DeadDataElimination can drop the now-userless staging container.
147
    std::string container_prefix = copy_prefix() + std::to_string(this->loop_.element_id()) + "_";
18✔
148

149
    // Allocate arguments and locals
150
    allocate_locals_on_device_stack(builder, analysis_manager, locals);
18✔
151
    handle_device_setup_and_teardown(builder, arguments, argument_sizes, container_prefix);
18✔
152

153
    // Copy-in arguments to device memory & allocation
154
    for (auto& [argument, meta] : arguments) {
37✔
155
        if (!meta.is_ptr) {
37✔
156
            continue;
18✔
157
        }
18✔
158
        auto argument_device = container_prefix + argument;
19✔
159
        auto& new_block = builder.add_block_before(*parent_scope, this->loop_, {}, this->loop_.debug_info());
19✔
160
        auto& size = argument_sizes.at(argument);
19✔
161
        copy_to_device_with_allocation(builder, argument, argument_device, size, SymEngine::null, new_block);
19✔
162
    }
19✔
163

164
    update_loop_containers(arguments, container_prefix);
18✔
165

166
    // Copy-out arguments to host memory & free
167
    for (auto& [argument, meta] : arguments) {
37✔
168
        if (!meta.is_ptr) {
37✔
169
            continue;
18✔
170
        }
18✔
171
        auto argument_device = container_prefix + argument;
19✔
172
        auto& new_block = builder.add_block_after(*parent_scope, this->loop_, {}, this->loop_.debug_info());
19✔
173
        auto& size = argument_sizes.at(argument);
19✔
174
        if (!skip_unneeded_d2h_ || meta.is_output) {
19✔
175
            copy_from_device_with_free(builder, new_block, argument, argument_device, size, SymEngine::null);
17✔
176
        } else {
17✔
177
            deallocate_device_arg(builder, new_block, argument_device, size, SymEngine::null);
2✔
178
        }
2✔
179
    }
19✔
180

181
    if (report_) report_->transform_applied(this);
18✔
182
}
18✔
183

184
void OffloadTransform::handle_device_setup_and_teardown(
185
    builder::StructuredSDFGBuilder& builder,
186

187
    const std::map<std::string, analysis::RegionArgument>& arguments,
188
    const std::unordered_map<std::string, symbolic::Expression>& argument_sizes,
189
    std::string prefix
190
) {
18✔
191
    // Add managed buffers for pointer arguments
192
    for (auto& [argument, meta] : arguments) {
37✔
193
        if (!meta.is_ptr || builder.subject().exists(prefix + argument)) {
37✔
194
            continue;
18✔
195
        }
18✔
196
        auto argument_device = prefix + argument;
19✔
197

198
        auto arg_size = argument_sizes.at(argument);
19✔
199

200
        add_device_buffer(builder, argument, argument_device, arg_size);
19✔
201
    }
19✔
202
}
18✔
203

204
void OffloadTransform::
205
    update_loop_containers(const std::map<std::string, analysis::RegionArgument>& arguments, std::string prefix) {
18✔
206
    for (auto& [argument, meta] : arguments) {
37✔
207
        if (meta.is_ptr) {
37✔
208
            auto argument_device = prefix + argument;
19✔
209
            this->loop_.replace(symbolic::symbol(argument), symbolic::symbol(argument_device));
19✔
210
        }
19✔
211
    }
37✔
212
}
18✔
213

214
} // namespace transformations
215
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc