• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 12047884020

27 Nov 2024 09:58AM UTC coverage: 94.956% (-0.02%) from 94.972%
12047884020

push

github

fknorr
Do not disable CGF disagnostics in test_utils::add_*_task

This eliminates dead code from an earlier incomplete refactoring.

The CGF teardown / reinit was only required by a single test, which was
coincidentally also broken and didn't test the feature advertised. This
commit splits up the test between runtime_ and runtime_deprecation_tests
and also moves runtime-independent sibling tests to task_graph_tests.

3202 of 3633 branches covered (88.14%)

Branch coverage included in aggregate %.

7151 of 7270 relevant lines covered (98.36%)

1224689.38 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.89
/src/task.cc
1
#include "task.h"
2

3
#include "access_modes.h"
4
#include "cgf.h"
5
#include "grid.h"
6
#include "range_mapper.h"
7
#include "ranges.h"
8
#include "types.h"
9
#include "utils.h"
10

11
#include <cstddef>
12
#include <string>
13
#include <unordered_map>
14
#include <utility>
15
#include <vector>
16

17

18
namespace celerity::detail {
19

20
template <int KernelDims>
21
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {
44,668✔
22
        switch(rm->get_buffer_dimensions()) {
44,668!
23
        case 0: return region_cast<3>(region(box<0>()));
26,918✔
24
        case 1: return region_cast<3>(rm->map_1(chnk));
9,073✔
25
        case 2: return region_cast<3>(rm->map_2(chnk));
21,116✔
26
        case 3: return rm->map_3(chnk);
1,020✔
27
        default: utils::unreachable(); // LCOV_EXCL_LINE
28
        }
29
}
30

31
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {
44,668✔
32
        switch(kernel_dims) {
44,668!
33
        case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));
31,318✔
34
        case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));
22,700✔
35
        case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));
33,894✔
36
        case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));
1,419✔
37
        default: utils::unreachable(); // LCOV_EXCL_LINE
38
        }
39
}
40

41
buffer_access_map::buffer_access_map(std::vector<buffer_access>&& accesses, const task_geometry& geometry)
3,506✔
42
    : m_accesses(std::move(accesses)), m_task_geometry(geometry) {
3,506✔
43
        std::unordered_map<buffer_id, region_builder<3>> consumed_regions;
3,506✔
44
        std::unordered_map<buffer_id, region_builder<3>> produced_regions;
3,506✔
45
        for(size_t i = 0; i < m_accesses.size(); ++i) {
7,233✔
46
                const auto& [bid, mode, rm] = m_accesses[i];
3,729✔
47
                m_accessed_buffers.insert(bid);
3,729✔
48
                const auto req = apply_range_mapper(rm.get(), chunk<3>{geometry.global_offset, geometry.global_size, geometry.global_size}, geometry.dimensions);
3,729✔
49
                auto& cons = consumed_regions[bid]; // allow default-insert
3,727✔
50
                auto& prod = produced_regions[bid]; // allow default-insert
3,727✔
51
                if(access::mode_traits::is_consumer(mode)) { cons.add(req); }
3,727✔
52
                if(access::mode_traits::is_producer(mode)) { prod.add(req); }
3,727✔
53
        }
3,727✔
54
        for(auto& [bid, builder] : consumed_regions) {
6,905✔
55
                m_task_consumed_regions.emplace(bid, std::move(builder).into_region());
3,401✔
56
        }
57
        for(auto& [bid, builder] : produced_regions) {
6,905✔
58
                m_task_produced_regions.emplace(bid, std::move(builder).into_region());
3,401✔
59
        }
60
}
7,020✔
61

62
region<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const box<3>& execution_range) const {
40,939✔
63
        const auto sr = execution_range.get_subrange();
40,939✔
64
        return apply_range_mapper(m_accesses[n].range_mapper.get(), chunk<3>{sr.offset, sr.range, m_task_geometry.global_size}, m_task_geometry.dimensions);
122,815✔
65
}
66

67
region<3> buffer_access_map::compute_consumed_region(const buffer_id bid, const box<3>& execution_range) const {
16,699✔
68
        region_builder<3> builder;
16,699✔
69
        for(size_t i = 0; i < m_accesses.size(); ++i) {
41,598✔
70
                const auto& [b, m, _] = m_accesses[i];
24,899✔
71
                if(b != bid || !access::mode_traits::is_consumer(m)) continue;
24,899✔
72
                builder.add(get_requirements_for_nth_access(i, execution_range));
12,123✔
73
        }
74
        return std::move(builder).into_region();
33,398✔
75
}
16,699✔
76

77
region<3> buffer_access_map::compute_produced_region(const buffer_id bid, const box<3>& execution_range) const {
21,115✔
78
        region_builder<3> builder;
21,115✔
79
        for(size_t i = 0; i < m_accesses.size(); ++i) {
52,834✔
80
                const auto& [b, m, _] = m_accesses[i];
31,719✔
81
                if(b != bid || !access::mode_traits::is_producer(m)) continue;
31,719✔
82
                builder.add(get_requirements_for_nth_access(i, execution_range));
13,994✔
83
        }
84
        return std::move(builder).into_region();
42,230✔
85
}
21,115✔
86

87
box_vector<3> buffer_access_map::compute_required_contiguous_boxes(const buffer_id bid, const box<3>& execution_range) const {
7,748✔
88
        box_vector<3> boxes;
7,748✔
89
        for(size_t i = 0; i < m_accesses.size(); ++i) {
19,772✔
90
                const auto& [b, a_mode, _] = m_accesses[i];
12,024✔
91
                if(b == bid) {
12,024✔
92
                        const auto accessed_region = get_requirements_for_nth_access(i, execution_range);
7,672✔
93
                        if(!accessed_region.empty()) { boxes.push_back(bounding_box(accessed_region)); }
7,672✔
94
                }
7,672✔
95
        }
96
        return boxes;
7,748✔
97
}
×
98

99
std::unique_ptr<detail::task> make_command_group_task(const detail::task_id tid, const size_t num_collective_nodes, raw_command_group&& cg) {
3,436✔
100
        std::unique_ptr<detail::task> task;
3,436✔
101
        switch(cg.task_type.value()) {
3,436!
102
        case detail::task_type::host_compute: {
362✔
103
                assert(!cg.collective_group_id.has_value());
362✔
104
                const auto& geometry = cg.geometry.value();
362✔
105
                if(geometry.global_size.size() == 0) {
362!
106
                        // TODO this can be easily supported by not creating a task in case the execution range is empty
107
                        throw std::runtime_error{"The execution range of distributed host tasks must have at least one item"};
×
108
                }
109
                auto& launcher = std::get<detail::host_task_launcher>(cg.launcher.value());
362✔
110
                buffer_access_map bam(std::move(cg.buffer_accesses), geometry);
362✔
111
                side_effect_map sem(cg.side_effects);
362✔
112
                task = detail::task::make_host_compute(tid, geometry, std::move(launcher), std::move(bam), std::move(sem), std::move(cg.reductions));
362✔
113
                break;
362✔
114
        }
362✔
115
        case detail::task_type::device_compute: {
1,617✔
116
                assert(!cg.collective_group_id.has_value());
1,617✔
117
                const auto& geometry = cg.geometry.value();
1,617✔
118
                if(geometry.global_size.size() == 0) {
1,617!
119
                        // TODO unless reductions are involved, this can be easily supported by not creating a task in case the execution range is empty.
120
                        // Edge case: If the task includes reductions that specify property::reduction::initialize_to_identity, we need to create a task that sets
121
                        // the buffer state to an empty pending_reduction_state in the graph_generator. This will cause a trivial reduction_command to be generated on
122
                        // each node that reads from the reduction output buffer, initializing it to the identity value locally.
123
                        throw std::runtime_error{"The execution range of device tasks must have at least one item"};
×
124
                }
125
                auto& launcher = std::get<detail::device_kernel_launcher>(cg.launcher.value());
1,617✔
126
                buffer_access_map bam(std::move(cg.buffer_accesses), geometry);
1,617✔
127
                // Note that cgf_diagnostics has a similar check, but we don't catch void side effects there.
128
                if(!cg.side_effects.empty()) { throw std::runtime_error{"Side effects cannot be used in device kernels"}; }
1,615!
129
                task = detail::task::make_device_compute(tid, geometry, std::move(launcher), std::move(bam), std::move(cg.reductions));
1,615✔
130
                break;
1,615✔
131
        }
1,615✔
132
        case detail::task_type::collective: {
68✔
133
                assert(!cg.geometry.has_value());
68✔
134
                const task_geometry geometry{// geometry is dependent on num_collective_nodes, so it is not set in raw_command_group
68✔
135
                    .dimensions = 1,
136
                    .global_size = detail::range_cast<3>(range(num_collective_nodes)),
68✔
137
                    .global_offset = zeros,
138
                    .granularity = ones};
68✔
139
                const auto cgid = cg.collective_group_id.value();
68✔
140
                auto& launcher = std::get<detail::host_task_launcher>(cg.launcher.value());
68✔
141
                buffer_access_map bam(std::move(cg.buffer_accesses), geometry);
68✔
142
                side_effect_map sem(cg.side_effects);
68✔
143
                assert(cg.reductions.empty());
68✔
144
                task = detail::task::make_collective(tid, geometry, cgid, num_collective_nodes, std::move(launcher), std::move(bam), std::move(sem));
68✔
145
                break;
68✔
146
        }
68✔
147
        case detail::task_type::master_node: {
1,389✔
148
                assert(!cg.collective_group_id.has_value());
1,389✔
149
                assert(!cg.geometry.has_value());
1,389✔
150
                auto& launcher = std::get<detail::host_task_launcher>(cg.launcher.value());
1,389✔
151
                buffer_access_map bam(std::move(cg.buffer_accesses), task_geometry{});
1,389✔
152
                side_effect_map sem(cg.side_effects);
1,389✔
153
                assert(cg.reductions.empty());
1,389✔
154
                task = detail::task::make_master_node(tid, std::move(launcher), std::move(bam), std::move(sem));
1,389✔
155
                break;
1,389✔
156
        }
1,389✔
157
        case detail::task_type::horizon:
×
158
        case detail::task_type::fence:
159
        case detail::task_type::epoch: //
160
                detail::utils::unreachable();
×
161
        }
162
        for(auto& h : cg.hints) {
3,576✔
163
                task->add_hint(std::move(h));
142✔
164
        }
165
        if(cg.task_name.has_value()) { task->set_debug_name(*cg.task_name); }
3,434✔
166
        return task;
3,434✔
167
}
2✔
168

169
} // namespace celerity::detail
170

171
namespace celerity {
172
namespace detail {
173

174
        std::string print_task_debug_label(const task& tsk, bool title_case) {
34✔
175
                return utils::make_task_debug_label(tsk.get_type(), tsk.get_id(), tsk.get_debug_name(), title_case);
34✔
176
        }
177

178
        std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {
5,685✔
179
                const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});
5,685✔
180

181
                auto& bam = tsk.get_buffer_access_map();
5,685✔
182

183
                // track the union of writes we have checked so far in order to detect an overlap between that union and the next write
184
                std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;
5,685✔
185
                // collect overlapping writes in order to report all of them before throwing
186
                std::unordered_map<buffer_id, region<3>> overlapping_writes;
5,685✔
187

188
                for(const auto bid : bam.get_accessed_buffers()) {
11,655✔
189
                        for(const auto& ck : chunks) {
14,706✔
190
                                const auto writes = bam.compute_produced_region(bid, ck.get_subrange());
8,736✔
191
                                if(!writes.empty()) {
8,736✔
192
                                        auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert
5,779✔
193
                                        if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {
5,779✔
194
                                                auto& full_overlap = overlapping_writes[bid]; // allow default-insert
23✔
195
                                                full_overlap = region_union(full_overlap, overlap);
23✔
196
                                        }
5,779✔
197
                                        write_accumulator = region_union(write_accumulator, writes);
5,779✔
198
                                }
199
                        }
8,736✔
200
                }
201

202
                // we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
203
                for(const auto& rinfo : tsk.get_reductions()) {
5,963✔
204
                        auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert
278✔
205
                        if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {
278!
206
                                auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert
×
207
                                full_overlap = region_union(full_overlap, overlap);
×
208
                        }
278✔
209
                        write_accumulator = region_union(write_accumulator, scalar_reduction_box);
278✔
210
                }
211

212
                return overlapping_writes;
5,685✔
213
        }
5,685✔
214

215
} // namespace detail
216
} // namespace celerity
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc