• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 12009901531

25 Nov 2024 12:20PM UTC coverage: 94.92% (+0.009%) from 94.911%
12009901531

push

github

fknorr
Add missing includes and consistently order them

We can't add the misc-include-cleaner lint because it causes too many
false positives with "interface headers" such as sycl.hpp.

3190 of 3626 branches covered (87.98%)

Branch coverage included in aggregate %.

7049 of 7161 relevant lines covered (98.44%)

1242183.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.8
/src/task.cc
1
#include "task.h"
2

3
#include "access_modes.h"
4
#include "grid.h"
5
#include "range_mapper.h"
6
#include "ranges.h"
7
#include "types.h"
8
#include "utils.h"
9

10
#include <cstddef>
11
#include <string>
12
#include <unordered_map>
13
#include <utility>
14
#include <vector>
15

16

17
namespace celerity::detail {
18

19
template <int KernelDims>
20
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {
44,690✔
21
        switch(rm->get_buffer_dimensions()) {
44,690!
22
        case 0: return region_cast<3>(region(box<0>()));
26,918✔
23
        case 1: return region_cast<3>(rm->map_1(chnk));
9,073✔
24
        case 2: return region_cast<3>(rm->map_2(chnk));
21,139✔
25
        case 3: return rm->map_3(chnk);
1,020✔
26
        default: utils::unreachable(); // LCOV_EXCL_LINE
27
        }
28
}
29

30
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {
44,690✔
31
        switch(kernel_dims) {
44,690!
32
        case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));
31,318✔
33
        case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));
22,701✔
34
        case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));
33,941✔
35
        case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));
1,419✔
36
        default: utils::unreachable(); // LCOV_EXCL_LINE
37
        }
38
}
39

40
buffer_access_map::buffer_access_map(std::vector<buffer_access>&& accesses, const task_geometry& geometry)
3,523✔
41
    : m_accesses(std::move(accesses)), m_task_geometry(geometry) {
3,523✔
42
        std::unordered_map<buffer_id, region_builder<3>> consumed_regions;
3,523✔
43
        std::unordered_map<buffer_id, region_builder<3>> produced_regions;
3,523✔
44
        for(size_t i = 0; i < m_accesses.size(); ++i) {
7,251✔
45
                const auto& [bid, mode, rm] = m_accesses[i];
3,730✔
46
                m_accessed_buffers.insert(bid);
3,730✔
47
                const auto req = apply_range_mapper(rm.get(), chunk<3>{geometry.global_offset, geometry.global_size, geometry.global_size}, geometry.dimensions);
3,730✔
48
                auto& cons = consumed_regions[bid]; // allow default-insert
3,728✔
49
                auto& prod = produced_regions[bid]; // allow default-insert
3,728✔
50
                if(access::mode_traits::is_consumer(mode)) { cons.add(req); }
3,728✔
51
                if(access::mode_traits::is_producer(mode)) { prod.add(req); }
3,728✔
52
        }
3,728✔
53
        for(auto& [bid, builder] : consumed_regions) {
6,923✔
54
                m_task_consumed_regions.emplace(bid, std::move(builder).into_region());
3,402✔
55
        }
56
        for(auto& [bid, builder] : produced_regions) {
6,923✔
57
                m_task_produced_regions.emplace(bid, std::move(builder).into_region());
3,402✔
58
        }
59
}
7,054✔
60

61
region<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const box<3>& execution_range) const {
40,961✔
62
        const auto sr = execution_range.get_subrange();
40,961✔
63
        return apply_range_mapper(m_accesses[n].range_mapper.get(), chunk<3>{sr.offset, sr.range, m_task_geometry.global_size}, m_task_geometry.dimensions);
122,882✔
64
}
65

66
region<3> buffer_access_map::compute_consumed_region(const buffer_id bid, const box<3>& execution_range) const {
16,709✔
67
        region_builder<3> builder;
16,709✔
68
        for(size_t i = 0; i < m_accesses.size(); ++i) {
41,618✔
69
                const auto& [b, m, _] = m_accesses[i];
24,909✔
70
                if(b != bid || !access::mode_traits::is_consumer(m)) continue;
24,909✔
71
                builder.add(get_requirements_for_nth_access(i, execution_range));
12,133✔
72
        }
73
        return std::move(builder).into_region();
33,418✔
74
}
16,709✔
75

76
region<3> buffer_access_map::compute_produced_region(const buffer_id bid, const box<3>& execution_range) const {
21,126✔
77
        region_builder<3> builder;
21,126✔
78
        for(size_t i = 0; i < m_accesses.size(); ++i) {
52,856✔
79
                const auto& [b, m, _] = m_accesses[i];
31,730✔
80
                if(b != bid || !access::mode_traits::is_producer(m)) continue;
31,730✔
81
                builder.add(get_requirements_for_nth_access(i, execution_range));
13,994✔
82
        }
83
        return std::move(builder).into_region();
42,252✔
84
}
21,126✔
85

86
box_vector<3> buffer_access_map::compute_required_contiguous_boxes(const buffer_id bid, const box<3>& execution_range) const {
7,756✔
87
        box_vector<3> boxes;
7,756✔
88
        for(size_t i = 0; i < m_accesses.size(); ++i) {
19,788✔
89
                const auto& [b, a_mode, _] = m_accesses[i];
12,032✔
90
                if(b == bid) {
12,032✔
91
                        const auto accessed_region = get_requirements_for_nth_access(i, execution_range);
7,680✔
92
                        if(!accessed_region.empty()) { boxes.push_back(bounding_box(accessed_region)); }
7,680✔
93
                }
7,680✔
94
        }
95
        return boxes;
7,756✔
96
}
×
97

98
} // namespace celerity::detail
99

100
namespace celerity {
101
namespace detail {
102

103
        void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) {
228✔
104
                // TODO for multiple side effects on the same hoid, find the weakest order satisfying all of them
105
                emplace(hoid, order);
228✔
106
        }
228✔
107

108
        std::string print_task_debug_label(const task& tsk, bool title_case) {
34✔
109
                return utils::make_task_debug_label(tsk.get_type(), tsk.get_id(), tsk.get_debug_name(), title_case);
34✔
110
        }
111

112
        std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {
5,687✔
113
                const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});
5,687✔
114

115
                auto& bam = tsk.get_buffer_access_map();
5,687✔
116

117
                // track the union of writes we have checked so far in order to detect an overlap between that union and the next write
118
                std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;
5,687✔
119
                // collect overlapping writes in order to report all of them before throwing
120
                std::unordered_map<buffer_id, region<3>> overlapping_writes;
5,687✔
121

122
                for(const auto bid : bam.get_accessed_buffers()) {
11,659✔
123
                        for(const auto& ck : chunks) {
14,713✔
124
                                const auto writes = bam.compute_produced_region(bid, ck.get_subrange());
8,741✔
125
                                if(!writes.empty()) {
8,741✔
126
                                        auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert
5,779✔
127
                                        if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {
5,779✔
128
                                                auto& full_overlap = overlapping_writes[bid]; // allow default-insert
23✔
129
                                                full_overlap = region_union(full_overlap, overlap);
23✔
130
                                        }
5,779✔
131
                                        write_accumulator = region_union(write_accumulator, writes);
5,779✔
132
                                }
133
                        }
8,741✔
134
                }
135

136
                // we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
137
                for(const auto& rinfo : tsk.get_reductions()) {
5,965✔
138
                        auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert
278✔
139
                        if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {
278!
140
                                auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert
×
141
                                full_overlap = region_union(full_overlap, overlap);
×
142
                        }
278✔
143
                        write_accumulator = region_union(write_accumulator, scalar_reduction_box);
278✔
144
                }
145

146
                return overlapping_writes;
5,687✔
147
        }
5,687✔
148

149
} // namespace detail
150
} // namespace celerity
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc