• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 9207778767

23 May 2024 12:15PM UTC coverage: 94.753% (+0.009%) from 94.744%
9207778767

Pull #253

github

web-flow
Merge 2fa06dcaa into d91cfea1f
Pull Request #253: [IDAG] Out-of-Order Engine for Execution Scheduling

3062 of 3417 branches covered (89.61%)

Branch coverage included in aggregate %.

183 of 188 new or added lines in 4 files covered. (97.34%)

1 existing line in 1 file now uncovered.

7050 of 7255 relevant lines covered (97.17%)

215263.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.58
/src/task.cc
1
#include "task.h"
2

3
#include "access_modes.h"
4
#include "utils.h"
5

6

7
namespace celerity {
8
namespace detail {
9

10
        std::unordered_set<buffer_id> buffer_access_map::get_accessed_buffers() const {
31,331✔
11
                std::unordered_set<buffer_id> result;
31,331✔
12
                for(const auto& [bid, _] : m_accesses) {
56,782✔
13
                        result.emplace(bid);
25,452✔
14
                }
15
                return result;
31,323✔
16
        }
×
17

18
        std::unordered_set<cl::sycl::access::mode> buffer_access_map::get_access_modes(buffer_id bid) const {
22,009✔
19
                std::unordered_set<cl::sycl::access::mode> result;
22,009✔
20
                for(const auto& [b, rm] : m_accesses) {
51,517✔
21
                        if(b == bid) { result.insert(rm->get_access_mode()); }
29,505✔
22
                }
23
                return result;
22,005✔
24
        }
×
25

26
        template <int KernelDims>
27
        subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {
28,736✔
28
                switch(rm->get_buffer_dimensions()) {
28,736!
29
                case 0: return subrange_cast<3>(subrange<0>());
236✔
30
                case 1: return subrange_cast<3>(rm->map_1(chnk));
36,553✔
31
                case 2: return subrange_cast<3>(rm->map_2(chnk));
18,964✔
32
                case 3: return rm->map_3(chnk);
851✔
33
                default: assert(false); return subrange<3>{};
×
34
                }
35
        }
36

37
        subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {
28,738✔
38
                switch(kernel_dims) {
28,738!
39
                case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));
28,820✔
40
                case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));
16,714✔
41
                case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));
10,846✔
42
                case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));
1,077✔
UNCOV
43
                default: assert(!"Unreachable"); return subrange<3>{};
×
44
                }
45
        }
46

47
        region<3> buffer_access_map::get_mode_requirements(
53,125✔
48
            const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
49
                box_vector<3> boxes;
53,125✔
50
                for(size_t i = 0; i < m_accesses.size(); ++i) {
119,630✔
51
                        if(m_accesses[i].first != bid || m_accesses[i].second->get_access_mode() != mode) continue;
66,523✔
52
                        boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size));
25,022✔
53
                }
54
                return region(std::move(boxes));
106,157✔
55
        }
53,086✔
56

57
        box<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
28,345✔
58
                return apply_range_mapper(m_accesses[n].second.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims);
56,656✔
59
        }
60

61
        box_vector<3> buffer_access_map::get_required_contiguous_boxes(
447✔
62
            const buffer_id bid, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
63
                box_vector<3> boxes;
447✔
64
                for(const auto& [a_bid, a_rm] : m_accesses) {
859✔
65
                        if(a_bid == bid) {
412✔
66
                                const auto accessed_box = box(apply_range_mapper(a_rm.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims));
392✔
67
                                if(!accessed_box.empty()) { boxes.push_back(accessed_box); }
392✔
68
                        }
69
                }
70
                return boxes;
447✔
71
        }
×
72

73
        void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) {
228✔
74
                // TODO for multiple side effects on the same hoid, find the weakest order satisfying all of them
75
                emplace(hoid, order);
228✔
76
        }
228✔
77

78
        std::string print_task_debug_label(const task& tsk, bool title_case) {
31✔
79
                const auto type_string = [&] {
31✔
80
                        switch(tsk.get_type()) {
31!
81
                        case task_type::epoch: return "epoch";
×
82
                        case task_type::host_compute: return "host-compute task";
2✔
83
                        case task_type::device_compute: return "device kernel";
20✔
84
                        case task_type::collective: return "collective host task";
7✔
85
                        case task_type::master_node: return "master-node host task";
2✔
86
                        case task_type::horizon: return "horizon";
×
87
                        case task_type::fence: return "fence";
×
88
                        default: return "unknown task";
×
89
                        }
90
                }();
31✔
91

92
                auto label = fmt::format("{} T{}", type_string, tsk.get_id());
62✔
93
                if(title_case) { label[0] = static_cast<char>(std::toupper(label[0])); }
31✔
94
                if(!tsk.get_debug_name().empty()) { fmt::format_to(std::back_inserter(label), " \"{}\"", tsk.get_debug_name()); }
31✔
95
                return label;
62✔
96
        }
×
97

98
        std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {
5,256✔
99
                const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});
5,256✔
100

101
                auto& bam = tsk.get_buffer_access_map();
5,256✔
102

103
                // track the union of writes we have checked so far in order to detect an overlap between that union and the next write
104
                std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;
5,256✔
105
                // collect overlapping writes in order to report all of them before throwing
106
                std::unordered_map<buffer_id, region<3>> overlapping_writes;
5,256✔
107

108
                for(const auto bid : bam.get_accessed_buffers()) {
9,447✔
109
                        for(const auto& ck : chunks) {
9,585✔
110
                                region<3> writes;
5,394✔
111
                                for(const auto mode : bam.get_access_modes(bid)) {
11,120✔
112
                                        if(access::mode_traits::is_producer(mode)) {
5,726✔
113
                                                const auto req = bam.get_mode_requirements(bid, mode, tsk.get_dimensions(), ck.get_subrange(), tsk.get_global_size());
3,945✔
114
                                                writes = region_union(writes, req);
3,945✔
115
                                        }
3,945✔
116
                                }
5,394✔
117
                                if(!writes.empty()) {
5,394✔
118
                                        auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert
3,898✔
119
                                        if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {
3,898✔
120
                                                auto& full_overlap = overlapping_writes[bid]; // allow default-insert
20✔
121
                                                full_overlap = region_union(full_overlap, overlap);
20✔
122
                                        }
3,898✔
123
                                        write_accumulator = region_union(write_accumulator, writes);
3,898✔
124
                                }
125
                        }
5,394✔
126
                }
5,256✔
127

128
                // we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
129
                for(const auto& rinfo : tsk.get_reductions()) {
5,442✔
130
                        auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert
186✔
131
                        if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {
186!
132
                                auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert
×
133
                                full_overlap = region_union(full_overlap, overlap);
×
134
                        }
186✔
135
                        write_accumulator = region_union(write_accumulator, scalar_reduction_box);
186✔
136
                }
137

138
                return overlapping_writes;
5,256✔
139
        }
5,256✔
140

141
} // namespace detail
142
} // namespace celerity
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc