• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28303955550

27 Jun 2026 10:38PM UTC coverage: 61.924% (+0.2%) from 61.754%
28303955550

Pull #814

github

web-flow
Merge 89b94697f into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

568 of 859 new or added lines in 16 files covered. (66.12%)

3 existing lines in 1 file now uncovered.

39450 of 63707 relevant lines covered (61.92%)

967.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.22
/opt/src/transformations/offloading/cuda_parallelize_nested_map.cpp
1
#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"
2

3
#include <sdfg/analysis/loop_analysis.h>
4
#include "sdfg/exceptions.h"
5
#include "sdfg/structured_control_flow/reduce.h"
6
#include "sdfg/symbolic/symbolic.h"
7
#include "sdfg/targets/cuda/cuda.h"
8
#include "sdfg/types/pointer.h"
9
#include "sdfg/types/scalar.h"
10

11
namespace sdfg {
12
namespace transformations {
13

14
CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::StructuredLoop& loop, size_t block_size)
15
    : loop_(loop), block_size_(block_size) {}
16✔
16

17
std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }
3✔
18

19
bool CUDAParallelizeNestedMap::
20
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
12✔
21
    if (dynamic_cast<structured_control_flow::Map*>(&loop_) == nullptr &&
12✔
22
        dynamic_cast<structured_control_flow::Reduce*>(&loop_) == nullptr) {
12✔
NEW
23
        throw InvalidTransformationDescriptionException(
×
NEW
24
            "CUDAParallelizeNestedMap: can only parallelize Map or Reduce nodes."
×
NEW
25
        );
×
NEW
26
    }
×
27

28
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
12✔
29

30
    // Condition: Check if map is not yet parallelized with CUDA
31
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
12✔
32
        return false;
1✔
33
    }
1✔
34

35
    // Condition: a nested Reduce can only be offloaded when every accumulator is
36
    // a device-resident pointer to a scalar.
37
    if (auto* reduce = dynamic_cast<structured_control_flow::Reduce*>(&loop_)) {
11✔
NEW
38
        auto& sdfg = builder.subject();
×
NEW
39
        for (auto& reduction : reduce->reductions()) {
×
NEW
40
            auto& type = sdfg.type(reduction.container);
×
NEW
41
            auto* ptr = dynamic_cast<const types::Pointer*>(&type);
×
NEW
42
            if (ptr == nullptr || !ptr->has_pointee_type() ||
×
NEW
43
                dynamic_cast<const types::Scalar*>(&ptr->pointee_type()) == nullptr) {
×
NEW
44
                return false;
×
NEW
45
            }
×
NEW
46
        }
×
NEW
47
    }
×
48

49
    // Condition: Check if parent loop exists
50
    auto parent = loop_analysis.parent_loop(&loop_);
11✔
51
    if (parent == nullptr) {
11✔
52
        return false;
1✔
53
    }
1✔
54

55
    // Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
56
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
10✔
57
        if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {
10✔
58
            return false;
1✔
59
        }
1✔
60
        if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {
9✔
61
            return false;
1✔
62
        }
1✔
63
        auto parent_indvar = map->indvar();
8✔
64
        auto ancestor = parent;
8✔
65
        while (ancestor) {
18✔
66
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
10✔
67
                parent_indvar = map_ancestor->indvar();
9✔
68
                for (auto& arg : symbolic::atoms(loop_.condition())) {
11✔
69
                    if (symbolic::eq(arg, parent_indvar)) {
11✔
70
                        return false;
×
71
                    }
×
72
                }
11✔
73
            }
9✔
74
            ancestor = loop_analysis.parent_loop(ancestor);
10✔
75
        }
10✔
76
    } else {
8✔
77
        return false;
×
78
    }
×
79

80
    // Note: arbitrary `init` and `stride` are permitted. The CUDA dispatcher
81
    // emits `<map.indvar> = init + thread_flat_id * stride`, so the body sees
82
    // the natural strided value; `num_iterations()` accounts for both when
83
    // computing the grid geometry.
84

85
    // Condition: Resulting CUDA grid dimension must not exceed hardware limits.
86
    // Y and Z grid dimensions are limited to 65535.
87
    auto num_iters = loop_.num_iterations();
8✔
88
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
8✔
89
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
6✔
90
        int64_t block = static_cast<int64_t>(block_size_);
6✔
91
        int64_t grid_size = (iters + block - 1) / block;
6✔
92

93
        constexpr int64_t max_grid_dim_yz = 65535;
6✔
94
        if (grid_size > max_grid_dim_yz) {
6✔
95
            return false;
1✔
96
        }
1✔
97
    }
6✔
98

99
    return true;
7✔
100
}
8✔
101

102
void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
5✔
103
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
5✔
104
    auto parent = loop_analysis.parent_loop(&loop_);
5✔
105

106
    auto parent_dim =
5✔
107
        cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());
5✔
108

109
    cuda::CUDADimension child_dim;
5✔
110
    if (parent_dim == cuda::CUDADimension::X) {
5✔
111
        child_dim = cuda::CUDADimension::Y;
4✔
112
    } else if (parent_dim == cuda::CUDADimension::Y) {
4✔
113
        child_dim = cuda::CUDADimension::Z;
1✔
114
    } else {
1✔
115
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
×
116
    }
×
117

118
    auto new_schedule = cuda::ScheduleType_CUDA::create();
5✔
119
    cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);
5✔
120
    cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));
5✔
121

122
    builder.update_schedule_type(loop_, new_schedule);
5✔
123
}
5✔
124

125
void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {
1✔
126
    j["transformation_type"] = this->name();
1✔
127
    j["parameters"] = nlohmann::json::object();
1✔
128
    j["parameters"]["block_size"] = block_size_;
1✔
129

130
    serializer::JSONSerializer ser_flat(false);
1✔
131
    j["subgraph"] = nlohmann::json::object();
1✔
132
    j["subgraph"]["0"] = nlohmann::json::object();
1✔
133
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
1✔
134
}
1✔
135

136
CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
137
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
1✔
138
    // Prefer the embedding-compatible representation (subgraph/parameters),
139
    // but fall back to legacy fields (loop/block_size) if needed.
140
    const auto& subgraph = j.at("subgraph");
1✔
141
    const auto& node_desc = subgraph.at("0");
1✔
142
    size_t loop_id = node_desc.at("element_id").get<size_t>();
1✔
143

144
    size_t block_size = j.at("parameters").at("block_size").get<size_t>();
1✔
145
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(builder.find_element_by_id(loop_id));
1✔
146
    if (!loop) {
1✔
147
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
×
148
    }
×
149
    return CUDAParallelizeNestedMap(*loop, block_size);
1✔
150
}
1✔
151

152
} // namespace transformations
153
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc