• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

daisytuner / docc / 28106147644

24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%
28106147644

Pull #806

github

web-flow
Merge 2be414d54 into 57cc1db99
Pull Request #806: Map Collapse for Multiple targets in a neste sequence

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.48
/opt/src/transformations/offloading/cuda_parallelize_nested_map.cpp
1
#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"
2

3
#include <sdfg/analysis/loop_analysis.h>
4
#include "sdfg/exceptions.h"
5
#include "sdfg/symbolic/symbolic.h"
6
#include "sdfg/targets/cuda/cuda.h"
7

8
namespace sdfg {
9
namespace transformations {
10

11
CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
12
    : loop_(loop), block_size_(block_size) {}
16✔
13

14
std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }
3✔
15

16
bool CUDAParallelizeNestedMap::
17
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
12✔
18
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
12✔
19

20
    // Condition: Check if map is not yet parallelized with CUDA
21
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
12✔
22
        return false;
1✔
23
    }
1✔
24

25
    // Condition: Check if parent loop exists
26
    auto parent = loop_analysis.parent_loop(&loop_);
11✔
27
    if (parent == nullptr) {
11✔
28
        return false;
1✔
29
    }
1✔
30

31
    // Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
32
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
10✔
33
        if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {
10✔
34
            return false;
1✔
35
        }
1✔
36
        if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {
9✔
37
            return false;
1✔
38
        }
1✔
39
        auto parent_indvar = map->indvar();
8✔
40
        auto ancestor = parent;
8✔
41
        while (ancestor) {
18✔
42
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
10✔
43
                parent_indvar = map_ancestor->indvar();
9✔
44
                for (auto& arg : symbolic::atoms(loop_.condition())) {
11✔
45
                    if (symbolic::eq(arg, parent_indvar)) {
11✔
46
                        return false;
×
47
                    }
×
48
                }
11✔
49
            }
9✔
50
            ancestor = loop_analysis.parent_loop(ancestor);
10✔
51
        }
10✔
52
    } else {
8✔
53
        return false;
×
54
    }
×
55

56
    // Note: arbitrary `init` and `stride` are permitted. The CUDA dispatcher
57
    // emits `<map.indvar> = init + thread_flat_id * stride`, so the body sees
58
    // the natural strided value; `num_iterations()` accounts for both when
59
    // computing the grid geometry.
60

61
    // Condition: Resulting CUDA grid dimension must not exceed hardware limits.
62
    // Y and Z grid dimensions are limited to 65535.
63
    auto num_iters = loop_.num_iterations();
8✔
64
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
8✔
65
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
6✔
66
        int64_t block = static_cast<int64_t>(block_size_);
6✔
67
        int64_t grid_size = (iters + block - 1) / block;
6✔
68

69
        constexpr int64_t max_grid_dim_yz = 65535;
6✔
70
        if (grid_size > max_grid_dim_yz) {
6✔
71
            return false;
1✔
72
        }
1✔
73
    }
6✔
74

75
    return true;
7✔
76
}
8✔
77

78
void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
5✔
79
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
5✔
80
    auto parent = loop_analysis.parent_loop(&loop_);
5✔
81

82
    auto parent_dim =
5✔
83
        cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());
5✔
84

85
    cuda::CUDADimension child_dim;
5✔
86
    if (parent_dim == cuda::CUDADimension::X) {
5✔
87
        child_dim = cuda::CUDADimension::Y;
4✔
88
    } else if (parent_dim == cuda::CUDADimension::Y) {
4✔
89
        child_dim = cuda::CUDADimension::Z;
1✔
90
    } else {
1✔
91
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
×
92
    }
×
93

94
    auto new_schedule = cuda::ScheduleType_CUDA::create();
5✔
95
    cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);
5✔
96
    cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));
5✔
97

98
    builder.update_schedule_type(loop_, new_schedule);
5✔
99
}
5✔
100

101
void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {
1✔
102
    j["transformation_type"] = this->name();
1✔
103
    j["parameters"] = nlohmann::json::object();
1✔
104
    j["parameters"]["block_size"] = block_size_;
1✔
105

106
    serializer::JSONSerializer ser_flat(false);
1✔
107
    j["subgraph"] = nlohmann::json::object();
1✔
108
    j["subgraph"]["0"] = nlohmann::json::object();
1✔
109
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
1✔
110
}
1✔
111

112
CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
113
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
1✔
114
    // Prefer the embedding-compatible representation (subgraph/parameters),
115
    // but fall back to legacy fields (loop/block_size) if needed.
116
    const auto& subgraph = j.at("subgraph");
1✔
117
    const auto& node_desc = subgraph.at("0");
1✔
118
    size_t loop_id = node_desc.at("element_id").get<size_t>();
1✔
119

120
    size_t block_size = j.at("parameters").at("block_size").get<size_t>();
1✔
121
    auto loop = dynamic_cast<structured_control_flow::Map*>(builder.find_element_by_id(loop_id));
1✔
122
    if (!loop) {
1✔
UNCOV
123
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
×
UNCOV
124
    }
×
125
    return CUDAParallelizeNestedMap(*loop, block_size);
1✔
126
}
1✔
127

128
} // namespace transformations
129
} // namespace sdfg
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc