• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28303955550

27 Jun 2026 10:38PM UTC coverage: 61.924% (+0.2%) from 61.754%
28303955550

Pull #814

github

web-flow
Merge 89b94697f into 8322f5994
Pull Request #814: Adds GPU reduce dispatchers

568 of 859 new or added lines in 16 files covered. (66.12%)

3 existing lines in 1 file now uncovered.

39450 of 63707 relevant lines covered (61.92%)

967.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

35.64
/opt/src/transformations/offloading/rocm_parallelize_nested_map.cpp
1
#include "sdfg/transformations/offloading/rocm_parallelize_nested_map.h"
2

3
#include <sdfg/analysis/loop_analysis.h>
4
#include "sdfg/exceptions.h"
5
#include "sdfg/structured_control_flow/reduce.h"
6
#include "sdfg/symbolic/symbolic.h"
7
#include "sdfg/targets/rocm/rocm.h"
8
#include "sdfg/types/pointer.h"
9
#include "sdfg/types/scalar.h"
10

11
namespace sdfg {
12
namespace transformations {
13

14
ROCMParallelizeNestedMap::ROCMParallelizeNestedMap(structured_control_flow::StructuredLoop& loop, size_t block_size)
15
    : loop_(loop), block_size_(block_size) {}
2✔
16

17
std::string ROCMParallelizeNestedMap::name() const { return "ROCMParallelizeNestedMap"; }
×
18

19
bool ROCMParallelizeNestedMap::
20
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
2✔
21
    if (dynamic_cast<structured_control_flow::Map*>(&loop_) == nullptr &&
2✔
22
        dynamic_cast<structured_control_flow::Reduce*>(&loop_) == nullptr) {
2✔
NEW
23
        throw InvalidTransformationDescriptionException(
×
NEW
24
            "CUDAParallelizeNestedMap: can only parallelize Map or Reduce nodes."
×
NEW
25
        );
×
NEW
26
    }
×
27
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
2✔
28

29
    // Condition: Check if map is not yet parallelized with ROCM
30
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
2✔
31
        return false;
×
32
    }
×
33

34
    // Condition: a nested Reduce can only be offloaded when every accumulator is
35
    // a device-resident pointer to a scalar.
36
    if (auto* reduce = dynamic_cast<structured_control_flow::Reduce*>(&loop_)) {
2✔
NEW
37
        auto& sdfg = builder.subject();
×
NEW
38
        for (auto& reduction : reduce->reductions()) {
×
NEW
39
            auto& type = sdfg.type(reduction.container);
×
NEW
40
            auto* ptr = dynamic_cast<const types::Pointer*>(&type);
×
NEW
41
            if (ptr == nullptr || !ptr->has_pointee_type() ||
×
NEW
42
                dynamic_cast<const types::Scalar*>(&ptr->pointee_type()) == nullptr) {
×
NEW
43
                return false;
×
NEW
44
            }
×
NEW
45
        }
×
NEW
46
    }
×
47

48
    // Condition: Check if parent loop exists
49
    auto parent = loop_analysis.parent_loop(&loop_);
2✔
50
    if (parent == nullptr) {
2✔
51
        return false;
×
52
    }
×
53

54
    // Condition: Check if parent loop is a ROCM map, and not Z dimension (final dimension)
55
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
2✔
56
        if (map->schedule_type().value() != rocm::ScheduleType_ROCM::value()) {
2✔
57
            return false;
×
58
        }
×
59
        if (rocm::ScheduleType_ROCM::dimension(map->schedule_type()) == rocm::ROCMDimension::Z) {
2✔
60
            return false;
×
61
        }
×
62
        auto parent_indvar = map->indvar();
2✔
63
        auto ancestor = parent;
2✔
64
        while (ancestor) {
4✔
65
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
2✔
66
                parent_indvar = map_ancestor->indvar();
2✔
67
                for (auto& arg : symbolic::atoms(loop_.condition())) {
2✔
68
                    if (symbolic::eq(arg, parent_indvar)) {
2✔
69
                        return false;
×
70
                    }
×
71
                }
2✔
72
            }
2✔
73
            ancestor = loop_analysis.parent_loop(ancestor);
2✔
74
        }
2✔
75
    } else {
2✔
76
        return false;
×
77
    }
×
78

79
    // Note: arbitrary `init` and `stride` are permitted. The ROCm dispatcher
80
    // emits `<map.indvar> = init + thread_flat_id * stride`, so the body sees
81
    // the natural strided value; `num_iterations()` accounts for both when
82
    // computing the grid geometry.
83

84
    // Condition: Resulting ROCm grid dimension must not exceed hardware limits.
85
    // Y and Z grid dimensions are limited to 65535.
86
    auto num_iters = loop_.num_iterations();
2✔
87
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
2✔
88
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
2✔
89
        int64_t block = static_cast<int64_t>(block_size_);
2✔
90
        int64_t grid_size = (iters + block - 1) / block;
2✔
91

92
        constexpr int64_t max_grid_dim_yz = 65535;
2✔
93
        if (grid_size > max_grid_dim_yz) {
2✔
94
            return false;
1✔
95
        }
1✔
96
    }
2✔
97

98
    return true;
1✔
99
}
2✔
100

101
void ROCMParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
×
102
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
×
103
    auto parent = loop_analysis.parent_loop(&loop_);
×
104

105
    auto parent_dim =
×
106
        rocm::ScheduleType_ROCM::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());
×
107

108
    rocm::ROCMDimension child_dim;
×
109
    if (parent_dim == rocm::ROCMDimension::X) {
×
110
        child_dim = rocm::ROCMDimension::Y;
×
111
    } else if (parent_dim == rocm::ROCMDimension::Y) {
×
112
        child_dim = rocm::ROCMDimension::Z;
×
113
    } else {
×
114
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
×
115
    }
×
116

117
    auto new_schedule = rocm::ScheduleType_ROCM::create();
×
118
    rocm::ScheduleType_ROCM::dimension(new_schedule, child_dim);
×
119
    rocm::ScheduleType_ROCM::block_size(new_schedule, symbolic::integer(block_size_));
×
120

121
    builder.update_schedule_type(loop_, new_schedule);
×
122
}
×
123

124
void ROCMParallelizeNestedMap::to_json(nlohmann::json& j) const {
×
125
    j["transformation_type"] = this->name();
×
126
    j["parameters"] = nlohmann::json::object();
×
127
    j["parameters"]["block_size"] = block_size_;
×
128

129
    serializer::JSONSerializer ser_flat(false);
×
130
    j["subgraph"] = nlohmann::json::object();
×
131
    j["subgraph"]["0"] = nlohmann::json::object();
×
132
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
×
133
}
×
134

135
ROCMParallelizeNestedMap ROCMParallelizeNestedMap::
136
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
×
137
    // Prefer the embedding-compatible representation (subgraph/parameters),
138
    // but fall back to legacy fields (loop/block_size) if needed.
139
    const auto& subgraph = j.at("subgraph");
×
140
    const auto& node_desc = subgraph.at("0");
×
141
    size_t loop_id = node_desc.at("element_id").get<size_t>();
×
142

143
    size_t block_size = j.at("parameters").at("block_size").get<size_t>();
×
NEW
144
    auto loop = dynamic_cast<structured_control_flow::StructuredLoop*>(builder.find_element_by_id(loop_id));
×
145
    if (!loop) {
×
146
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
×
147
    }
×
148
    return ROCMParallelizeNestedMap(*loop, block_size);
×
149
}
×
150

151
} // namespace transformations
152
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc