• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 27471076166

13 Jun 2026 03:33PM UTC coverage: 61.254% (-0.02%) from 61.274%
27471076166

Pull #760

github

web-flow
Merge d6c6a15bd into db7d71ecc
Pull Request #760: Add hardware limit checks for gpu

20 of 52 new or added lines in 4 files covered. (38.46%)

13 existing lines in 2 files now uncovered.

36267 of 59208 relevant lines covered (61.25%)

1122.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.13
/opt/src/transformations/offloading/cuda_parallelize_nested_map.cpp
1
#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"
2

3
#include <sdfg/analysis/loop_analysis.h>
4
#include "sdfg/exceptions.h"
5
#include "sdfg/symbolic/symbolic.h"
6
#include "sdfg/targets/cuda/cuda.h"
7

8
namespace sdfg {
9
namespace transformations {
10

11
CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
12
    : loop_(loop), block_size_(block_size) {}
13✔
13

14
std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }
3✔
15

16
bool CUDAParallelizeNestedMap::
17
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
9✔
18
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
9✔
19

20
    // Condition: Check if map is not yet parallelized with CUDA
21
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
9✔
22
        return false;
1✔
23
    }
1✔
24

25
    // Condition: Check if parent loop exists
26
    auto parent = loop_analysis.parent_loop(&loop_);
8✔
27
    if (parent == nullptr) {
8✔
28
        return false;
1✔
29
    }
1✔
30

31
    // Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
32
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
7✔
33
        if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {
7✔
34
            return false;
1✔
35
        }
1✔
36
        if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {
6✔
37
            return false;
1✔
38
        }
1✔
39
        auto parent_indvar = map->indvar();
5✔
40
        auto ancestor = parent;
5✔
41
        while (ancestor) {
12✔
42
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
7✔
43
                parent_indvar = map_ancestor->indvar();
6✔
44
                for (auto& arg : symbolic::atoms(loop_.condition())) {
8✔
45
                    if (symbolic::eq(arg, parent_indvar)) {
8✔
46
                        return false;
×
47
                    }
×
48
                }
8✔
49
            }
6✔
50
            ancestor = loop_analysis.parent_loop(ancestor);
7✔
51
        }
7✔
52
    } else {
5✔
53
        return false;
×
54
    }
×
55

56
    // Condition: Check if current loop starts from 0
57
    if (!symbolic::eq(loop_.init(), symbolic::zero())) {
5✔
58
        return false;
1✔
59
    }
1✔
60

61
    // Condition: Loop has a stride of 1
62
    auto stride = loop_.stride();
4✔
63
    if (!symbolic::eq(stride, symbolic::one())) {
4✔
64
        return false;
×
65
    }
×
66

67
    // Condition: Resulting CUDA grid dimension must not exceed hardware limits.
68
    // Y and Z grid dimensions are limited to 65535.
69
    auto num_iters = loop_.num_iterations();
4✔
70
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
4✔
71
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
2✔
72
        int64_t block = static_cast<int64_t>(block_size_);
2✔
73
        int64_t grid_size = (iters + block - 1) / block;
2✔
74

75
        constexpr int64_t max_grid_dim_yz = 65535;
2✔
76
        if (grid_size > max_grid_dim_yz) {
2✔
NEW
77
            return false;
×
NEW
78
        }
×
79
    }
2✔
80

81
    return true;
4✔
82
}
4✔
83

84
void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
4✔
85
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
4✔
86
    auto parent = loop_analysis.parent_loop(&loop_);
4✔
87

88
    auto parent_dim =
4✔
89
        cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());
4✔
90

91
    cuda::CUDADimension child_dim;
4✔
92
    if (parent_dim == cuda::CUDADimension::X) {
4✔
93
        child_dim = cuda::CUDADimension::Y;
3✔
94
    } else if (parent_dim == cuda::CUDADimension::Y) {
3✔
95
        child_dim = cuda::CUDADimension::Z;
1✔
96
    } else {
1✔
97
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
×
98
    }
×
99

100
    auto new_schedule = cuda::ScheduleType_CUDA::create();
4✔
101
    cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);
4✔
102
    cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));
4✔
103

104
    builder.update_schedule_type(loop_, new_schedule);
4✔
105
}
4✔
106

107
void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {
1✔
108
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
1✔
109
        throw std::runtime_error("CUDAParallelizeNestedMap transformation does not support for-loops.");
×
110
    }
×
111
    j["transformation_type"] = this->name();
1✔
112

113
    // Describe the subgraph in a form compatible with EmbeddingRecorder/EmbeddingReplayer.
114
    // Keep the existing "loop" and "block_size" fields for backward compatibility.
115
    j["subgraph"] = {{"0", {{"element_id", loop_.element_id()}, {"type", "map"}}}};
1✔
116

117
    j["parameters"] = {{"block_size", block_size_}};
1✔
118

119
    j["loop"] = loop_.element_id();
1✔
120
    j["block_size"] = block_size_;
1✔
121
}
1✔
122

123
CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
124
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
1✔
125
    // Prefer the embedding-compatible representation (subgraph/parameters),
126
    // but fall back to legacy fields (loop/block_size) if needed.
127
    size_t loop_id;
1✔
128
    if (j.contains("subgraph")) {
1✔
129
        const auto& subgraph = j.at("subgraph");
1✔
130
        const auto& node_desc = subgraph.at("0");
1✔
131
        loop_id = node_desc.at("element_id").get<size_t>();
1✔
132
    } else {
1✔
133
        loop_id = j.at("loop").get<size_t>();
×
134
    }
×
135

136
    size_t block_size;
1✔
137
    if (j.contains("parameters") && j.at("parameters").contains("block_size")) {
1✔
138
        block_size = j.at("parameters").at("block_size").get<size_t>();
1✔
139
    } else {
1✔
140
        block_size = j.at("block_size").get<size_t>();
×
141
    }
×
142
    auto element = builder.find_element_by_id(loop_id);
1✔
143
    if (!element) {
1✔
144
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
×
145
    }
×
146
    auto loop = dynamic_cast<structured_control_flow::Map*>(element);
1✔
147
    if (!loop) {
1✔
148
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
×
149
    }
×
150
    return CUDAParallelizeNestedMap(*loop, block_size);
1✔
151
}
1✔
152

153
} // namespace transformations
154
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc