27471076166

Committed 13 Jun 2026 03:33PM UTC coverage: 61.254% (-0.02%) from 61.274%

Build # 27471076166

Build Type

Pull #760

github

Committed by

web-flow

Commit Message

Merge d6c6a15bd into db7d71ecc

Pull Request Pull Request #760: Add hardware limit checks for gpu

Coverage Stats

20 of 52 new or added lines in 4 files covered. (38.46%)

13 existing lines in 2 files now uncovered.

36267 of 59208 relevant lines covered (61.25%)

1122.95 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

81.13

/opt/src/transformations/offloading/cuda_parallelize_nested_map.cpp

#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"

#include <sdfg/analysis/loop_analysis.h>
#include "sdfg/exceptions.h"
#include "sdfg/symbolic/symbolic.h"
#include "sdfg/targets/cuda/cuda.h"

namespace sdfg {
namespace transformations {

CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
    : loop_(loop), block_size_(block_size) {}

std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }

bool CUDAParallelizeNestedMap::
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();

    // Condition: Check if map is not yet parallelized with CUDA
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
        return false;
    }

    // Condition: Check if parent loop exists
    auto parent = loop_analysis.parent_loop(&loop_);
    if (parent == nullptr) {
        return false;
    }

    // Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
        if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {
            return false;
        }
        if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {
            return false;
        }
        auto parent_indvar = map->indvar();
        auto ancestor = parent;
        while (ancestor) {
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
                parent_indvar = map_ancestor->indvar();
                for (auto& arg : symbolic::atoms(loop_.condition())) {
                    if (symbolic::eq(arg, parent_indvar)) {
                        return false;
                    }
                }
            }
            ancestor = loop_analysis.parent_loop(ancestor);
        }
    } else {
        return false;
    }

    // Condition: Check if current loop starts from 0
    if (!symbolic::eq(loop_.init(), symbolic::zero())) {
        return false;
    }

    // Condition: Loop has a stride of 1
    auto stride = loop_.stride();
    if (!symbolic::eq(stride, symbolic::one())) {
        return false;
    }

    // Condition: Resulting CUDA grid dimension must not exceed hardware limits.
    // Y and Z grid dimensions are limited to 65535.
    auto num_iters = loop_.num_iterations();
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
        int64_t block = static_cast<int64_t>(block_size_);
        int64_t grid_size = (iters + block - 1) / block;

        constexpr int64_t max_grid_dim_yz = 65535;
        if (grid_size > max_grid_dim_yz) {
            return false;
        }
    }

    return true;
}

void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
    auto parent = loop_analysis.parent_loop(&loop_);

    auto parent_dim =
        cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());

    cuda::CUDADimension child_dim;
    if (parent_dim == cuda::CUDADimension::X) {
        child_dim = cuda::CUDADimension::Y;
    } else if (parent_dim == cuda::CUDADimension::Y) {
        child_dim = cuda::CUDADimension::Z;
    } else {
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
    }

    auto new_schedule = cuda::ScheduleType_CUDA::create();
    cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);
    cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));

    builder.update_schedule_type(loop_, new_schedule);
}

void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {
    if (dynamic_cast<structured_control_flow::For*>(&loop_)) {
        throw std::runtime_error("CUDAParallelizeNestedMap transformation does not support for-loops.");
    }
    j["transformation_type"] = this->name();

    // Describe the subgraph in a form compatible with EmbeddingRecorder/EmbeddingReplayer.
    // Keep the existing "loop" and "block_size" fields for backward compatibility.
    j["subgraph"] = {{"0", {{"element_id", loop_.element_id()}, {"type", "map"}}}};

    j["parameters"] = {{"block_size", block_size_}};

    j["loop"] = loop_.element_id();
    j["block_size"] = block_size_;
}

CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
    // Prefer the embedding-compatible representation (subgraph/parameters),
    // but fall back to legacy fields (loop/block_size) if needed.
    size_t loop_id;
    if (j.contains("subgraph")) {
        const auto& subgraph = j.at("subgraph");
        const auto& node_desc = subgraph.at("0");
        loop_id = node_desc.at("element_id").get<size_t>();
    } else {
        loop_id = j.at("loop").get<size_t>();
    }

    size_t block_size;
    if (j.contains("parameters") && j.at("parameters").contains("block_size")) {
        block_size = j.at("parameters").at("block_size").get<size_t>();
    } else {
        block_size = j.at("block_size").get<size_t>();
    }
    auto element = builder.find_element_by_id(loop_id);
    if (!element) {
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");
    }
    auto loop = dynamic_cast<structured_control_flow::Map*>(element);
    if (!loop) {
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
    }
    return CUDAParallelizeNestedMap(*loop, block_size);
}

} // namespace transformations
} // namespace sdfg

1	#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"
2
3	#include <sdfg/analysis/loop_analysis.h>
4	#include "sdfg/exceptions.h"
5	#include "sdfg/symbolic/symbolic.h"
6	#include "sdfg/targets/cuda/cuda.h"
7
8	namespace sdfg {
9	namespace transformations {
10
11	CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
12	: loop_(loop), block_size_(block_size) {}	13✔
13
14	std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }	3✔
15
16	bool CUDAParallelizeNestedMap::
17	can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	9✔
18	auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();	9✔
19
20	// Condition: Check if map is not yet parallelized with CUDA
21	if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {	9✔
22	return false;	1✔
23	}	1✔
24
25	// Condition: Check if parent loop exists
26	auto parent = loop_analysis.parent_loop(&loop_);	8✔
27	if (parent == nullptr) {	8✔
28	return false;	1✔
29	}	1✔
30
31	// Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
32	if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {	7✔
33	if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {	7✔
34	return false;	1✔
35	}	1✔
36	if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {	6✔
37	return false;	1✔
38	}	1✔
39	auto parent_indvar = map->indvar();	5✔
40	auto ancestor = parent;	5✔
41	while (ancestor) {	12✔
42	if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {	7✔
43	parent_indvar = map_ancestor->indvar();	6✔
44	for (auto& arg : symbolic::atoms(loop_.condition())) {	8✔
45	if (symbolic::eq(arg, parent_indvar)) {	8✔
46	return false;	×
47	}	×
48	}	8✔
49	}	6✔
50	ancestor = loop_analysis.parent_loop(ancestor);	7✔
51	}	7✔
52	} else {	5✔
53	return false;	×
54	}	×
55
56	// Condition: Check if current loop starts from 0
57	if (!symbolic::eq(loop_.init(), symbolic::zero())) {	5✔
58	return false;	1✔
59	}	1✔
60
61	// Condition: Loop has a stride of 1
62	auto stride = loop_.stride();	4✔
63	if (!symbolic::eq(stride, symbolic::one())) {	4✔
64	return false;	×
65	}	×
66
67	// Condition: Resulting CUDA grid dimension must not exceed hardware limits.
68	// Y and Z grid dimensions are limited to 65535.
69	auto num_iters = loop_.num_iterations();	4✔
70	if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {	4✔
71	int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();	2✔
72	int64_t block = static_cast<int64_t>(block_size_);	2✔
73	int64_t grid_size = (iters + block - 1) / block;	2✔
74
75	constexpr int64_t max_grid_dim_yz = 65535;	2✔
76	if (grid_size > max_grid_dim_yz) {	2✔
NEW 77	return false;	×
NEW 78	}	×
79	}	2✔
80
81	return true;	4✔
82	}	4✔
83
84	void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	4✔
85	auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();	4✔
86	auto parent = loop_analysis.parent_loop(&loop_);	4✔
87
88	auto parent_dim =	4✔
89	cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());	4✔
90
91	cuda::CUDADimension child_dim;	4✔
92	if (parent_dim == cuda::CUDADimension::X) {	4✔
93	child_dim = cuda::CUDADimension::Y;	3✔
94	} else if (parent_dim == cuda::CUDADimension::Y) {	3✔
95	child_dim = cuda::CUDADimension::Z;	1✔
96	} else {	1✔
97	throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");	×
98	}	×
99
100	auto new_schedule = cuda::ScheduleType_CUDA::create();	4✔
101	cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);	4✔
102	cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));	4✔
103
104	builder.update_schedule_type(loop_, new_schedule);	4✔
105	}	4✔
106
107	void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {	1✔
108	if (dynamic_cast<structured_control_flow::For*>(&loop_)) {	1✔
109	throw std::runtime_error("CUDAParallelizeNestedMap transformation does not support for-loops.");	×
110	}	×
111	j["transformation_type"] = this->name();	1✔
112
113	// Describe the subgraph in a form compatible with EmbeddingRecorder/EmbeddingReplayer.
114	// Keep the existing "loop" and "block_size" fields for backward compatibility.
115	j["subgraph"] = {{"0", {{"element_id", loop_.element_id()}, {"type", "map"}}}};	1✔
116
117	j["parameters"] = {{"block_size", block_size_}};	1✔
118
119	j["loop"] = loop_.element_id();	1✔
120	j["block_size"] = block_size_;	1✔
121	}	1✔
122
123	CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
124	from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {	1✔
125	// Prefer the embedding-compatible representation (subgraph/parameters),
126	// but fall back to legacy fields (loop/block_size) if needed.
127	size_t loop_id;	1✔
128	if (j.contains("subgraph")) {	1✔
129	const auto& subgraph = j.at("subgraph");	1✔
130	const auto& node_desc = subgraph.at("0");	1✔
131	loop_id = node_desc.at("element_id").get<size_t>();	1✔
132	} else {	1✔
133	loop_id = j.at("loop").get<size_t>();	×
134	}	×
135
136	size_t block_size;	1✔
137	if (j.contains("parameters") && j.at("parameters").contains("block_size")) {	1✔
138	block_size = j.at("parameters").at("block_size").get<size_t>();	1✔
139	} else {	1✔
140	block_size = j.at("block_size").get<size_t>();	×
141	}	×
142	auto element = builder.find_element_by_id(loop_id);	1✔
143	if (!element) {	1✔
144	throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " not found.");	×
145	}	×
146	auto loop = dynamic_cast<structured_control_flow::Map*>(element);	1✔
147	if (!loop) {	1✔
148	throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");	×
149	}	×
150	return CUDAParallelizeNestedMap(*loop, block_size);	1✔
151	}	1✔
152
153	} // namespace transformations
154	} // namespace sdfg

daisytuner / docc / 27471076166

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous