28106147644

Committed 24 Jun 2026 02:32PM UTC coverage: 61.922% (+0.1%) from 61.779%

Build # 28106147644

Build Type

Pull #806

github

Committed by

web-flow

Commit Message

Merge 2be414d54 into 57cc1db99

Pull Request Pull Request #806: Map Collapse for Multiple targets in a neste sequence

Coverage Stats

165 of 185 new or added lines in 2 files covered. (89.19%)

419 existing lines in 30 files now uncovered.

37705 of 60891 relevant lines covered (61.92%)

1004.4 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.48

/opt/src/transformations/offloading/cuda_parallelize_nested_map.cpp

#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"

#include <sdfg/analysis/loop_analysis.h>
#include "sdfg/exceptions.h"
#include "sdfg/symbolic/symbolic.h"
#include "sdfg/targets/cuda/cuda.h"

namespace sdfg {
namespace transformations {

CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
    : loop_(loop), block_size_(block_size) {}

std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }

bool CUDAParallelizeNestedMap::
    can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();

    // Condition: Check if map is not yet parallelized with CUDA
    if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {
        return false;
    }

    // Condition: Check if parent loop exists
    auto parent = loop_analysis.parent_loop(&loop_);
    if (parent == nullptr) {
        return false;
    }

    // Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
    if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {
        if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {
            return false;
        }
        if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {
            return false;
        }
        auto parent_indvar = map->indvar();
        auto ancestor = parent;
        while (ancestor) {
            if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {
                parent_indvar = map_ancestor->indvar();
                for (auto& arg : symbolic::atoms(loop_.condition())) {
                    if (symbolic::eq(arg, parent_indvar)) {
                        return false;
                    }
                }
            }
            ancestor = loop_analysis.parent_loop(ancestor);
        }
    } else {
        return false;
    }

    // Note: arbitrary `init` and `stride` are permitted. The CUDA dispatcher
    // emits `<map.indvar> = init + thread_flat_id * stride`, so the body sees
    // the natural strided value; `num_iterations()` accounts for both when
    // computing the grid geometry.

    // Condition: Resulting CUDA grid dimension must not exceed hardware limits.
    // Y and Z grid dimensions are limited to 65535.
    auto num_iters = loop_.num_iterations();
    if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {
        int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();
        int64_t block = static_cast<int64_t>(block_size_);
        int64_t grid_size = (iters + block - 1) / block;

        constexpr int64_t max_grid_dim_yz = 65535;
        if (grid_size > max_grid_dim_yz) {
            return false;
        }
    }

    return true;
}

void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {
    auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();
    auto parent = loop_analysis.parent_loop(&loop_);

    auto parent_dim =
        cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());

    cuda::CUDADimension child_dim;
    if (parent_dim == cuda::CUDADimension::X) {
        child_dim = cuda::CUDADimension::Y;
    } else if (parent_dim == cuda::CUDADimension::Y) {
        child_dim = cuda::CUDADimension::Z;
    } else {
        throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");
    }

    auto new_schedule = cuda::ScheduleType_CUDA::create();
    cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);
    cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));

    builder.update_schedule_type(loop_, new_schedule);
}

void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {
    j["transformation_type"] = this->name();
    j["parameters"] = nlohmann::json::object();
    j["parameters"]["block_size"] = block_size_;

    serializer::JSONSerializer ser_flat(false);
    j["subgraph"] = nlohmann::json::object();
    j["subgraph"]["0"] = nlohmann::json::object();
    ser_flat.serialize_node(j["subgraph"]["0"], loop_);
}

CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
    from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {
    // Prefer the embedding-compatible representation (subgraph/parameters),
    // but fall back to legacy fields (loop/block_size) if needed.
    const auto& subgraph = j.at("subgraph");
    const auto& node_desc = subgraph.at("0");
    size_t loop_id = node_desc.at("element_id").get<size_t>();

    size_t block_size = j.at("parameters").at("block_size").get<size_t>();
    auto loop = dynamic_cast<structured_control_flow::Map*>(builder.find_element_by_id(loop_id));
    if (!loop) {
        throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");
    }
    return CUDAParallelizeNestedMap(*loop, block_size);
}

} // namespace transformations
} // namespace sdfg

1	#include "sdfg/transformations/offloading/cuda_parallelize_nested_map.h"
2
3	#include <sdfg/analysis/loop_analysis.h>
4	#include "sdfg/exceptions.h"
5	#include "sdfg/symbolic/symbolic.h"
6	#include "sdfg/targets/cuda/cuda.h"
7
8	namespace sdfg {
9	namespace transformations {
10
11	CUDAParallelizeNestedMap::CUDAParallelizeNestedMap(structured_control_flow::Map& loop, size_t block_size)
12	: loop_(loop), block_size_(block_size) {}	16✔
13
14	std::string CUDAParallelizeNestedMap::name() const { return "CUDAParallelizeNestedMap"; }	3✔
15
16	bool CUDAParallelizeNestedMap::
17	can_be_applied(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	12✔
18	auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();	12✔
19
20	// Condition: Check if map is not yet parallelized with CUDA
21	if (loop_.schedule_type().value() != ScheduleType_Sequential::value()) {	12✔
22	return false;	1✔
23	}	1✔
24
25	// Condition: Check if parent loop exists
26	auto parent = loop_analysis.parent_loop(&loop_);	11✔
27	if (parent == nullptr) {	11✔
28	return false;	1✔
29	}	1✔
30
31	// Condition: Check if parent loop is a CUDA map, and not Z dimension (final dimension)
32	if (auto map = dynamic_cast<structured_control_flow::Map*>(parent)) {	10✔
33	if (map->schedule_type().value() != cuda::ScheduleType_CUDA::value()) {	10✔
34	return false;	1✔
35	}	1✔
36	if (cuda::ScheduleType_CUDA::dimension(map->schedule_type()) == cuda::CUDADimension::Z) {	9✔
37	return false;	1✔
38	}	1✔
39	auto parent_indvar = map->indvar();	8✔
40	auto ancestor = parent;	8✔
41	while (ancestor) {	18✔
42	if (auto map_ancestor = dynamic_cast<structured_control_flow::Map*>(ancestor)) {	10✔
43	parent_indvar = map_ancestor->indvar();	9✔
44	for (auto& arg : symbolic::atoms(loop_.condition())) {	11✔
45	if (symbolic::eq(arg, parent_indvar)) {	11✔
46	return false;	×
47	}	×
48	}	11✔
49	}	9✔
50	ancestor = loop_analysis.parent_loop(ancestor);	10✔
51	}	10✔
52	} else {	8✔
53	return false;	×
54	}	×
55
56	// Note: arbitrary `init` and `stride` are permitted. The CUDA dispatcher
57	// emits `<map.indvar> = init + thread_flat_id * stride`, so the body sees
58	// the natural strided value; `num_iterations()` accounts for both when
59	// computing the grid geometry.
60
61	// Condition: Resulting CUDA grid dimension must not exceed hardware limits.
62	// Y and Z grid dimensions are limited to 65535.
63	auto num_iters = loop_.num_iterations();	8✔
64	if (!num_iters.is_null() && SymEngine::is_a<SymEngine::Integer>(*num_iters)) {	8✔
65	int64_t iters = SymEngine::down_cast<const SymEngine::Integer&>(*num_iters).as_int();	6✔
66	int64_t block = static_cast<int64_t>(block_size_);	6✔
67	int64_t grid_size = (iters + block - 1) / block;	6✔
68
69	constexpr int64_t max_grid_dim_yz = 65535;	6✔
70	if (grid_size > max_grid_dim_yz) {	6✔
71	return false;	1✔
72	}	1✔
73	}	6✔
74
75	return true;	7✔
76	}	8✔
77
78	void CUDAParallelizeNestedMap::apply(builder::StructuredSDFGBuilder& builder, analysis::AnalysisManager& analysis_manager) {	5✔
79	auto& loop_analysis = analysis_manager.get<analysis::LoopAnalysis>();	5✔
80	auto parent = loop_analysis.parent_loop(&loop_);	5✔
81
82	auto parent_dim =	5✔
83	cuda::ScheduleType_CUDA::dimension(static_cast<structured_control_flow::Map*>(parent)->schedule_type());	5✔
84
85	cuda::CUDADimension child_dim;	5✔
86	if (parent_dim == cuda::CUDADimension::X) {	5✔
87	child_dim = cuda::CUDADimension::Y;	4✔
88	} else if (parent_dim == cuda::CUDADimension::Y) {	4✔
89	child_dim = cuda::CUDADimension::Z;	1✔
90	} else {	1✔
91	throw InvalidSDFGException("Parent loop is Z dimension, cannot parallelize nested map.");	×
92	}	×
93
94	auto new_schedule = cuda::ScheduleType_CUDA::create();	5✔
95	cuda::ScheduleType_CUDA::dimension(new_schedule, child_dim);	5✔
96	cuda::ScheduleType_CUDA::block_size(new_schedule, symbolic::integer(block_size_));	5✔
97
98	builder.update_schedule_type(loop_, new_schedule);	5✔
99	}	5✔
100
101	void CUDAParallelizeNestedMap::to_json(nlohmann::json& j) const {	1✔
102	j["transformation_type"] = this->name();	1✔
103	j["parameters"] = nlohmann::json::object();	1✔
104	j["parameters"]["block_size"] = block_size_;	1✔
105
106	serializer::JSONSerializer ser_flat(false);	1✔
107	j["subgraph"] = nlohmann::json::object();	1✔
108	j["subgraph"]["0"] = nlohmann::json::object();	1✔
109	ser_flat.serialize_node(j["subgraph"]["0"], loop_);	1✔
110	}	1✔
111
112	CUDAParallelizeNestedMap CUDAParallelizeNestedMap::
113	from_json(builder::StructuredSDFGBuilder& builder, const nlohmann::json& j) {	1✔
114	// Prefer the embedding-compatible representation (subgraph/parameters),
115	// but fall back to legacy fields (loop/block_size) if needed.
116	const auto& subgraph = j.at("subgraph");	1✔
117	const auto& node_desc = subgraph.at("0");	1✔
118	size_t loop_id = node_desc.at("element_id").get<size_t>();	1✔
119
120	size_t block_size = j.at("parameters").at("block_size").get<size_t>();	1✔
121	auto loop = dynamic_cast<structured_control_flow::Map*>(builder.find_element_by_id(loop_id));	1✔
122	if (!loop) {	1✔
UNCOV 123	throw InvalidTransformationDescriptionException("Element with ID " + std::to_string(loop_id) + " is not a loop.");	×
UNCOV 124	}	×
125	return CUDAParallelizeNestedMap(*loop, block_size);	1✔
126	}	1✔
127
128	} // namespace transformations
129	} // namespace sdfg

daisytuner / docc / 28106147644

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous