• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 11854130628

15 Nov 2024 09:58AM UTC coverage: 95.102% (-0.06%) from 95.163%
11854130628

push

github

psalz
Update benchmark results for buffer_access_map refactor

2992 of 3394 branches covered (88.16%)

Branch coverage included in aggregate %.

6677 of 6773 relevant lines covered (98.58%)

1294452.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.8
/src/task.cc
1
#include "task.h"
2

3
#include <cstddef>
4
#include <unordered_map>
5
#include <utility>
6
#include <vector>
7

8
#include "access_modes.h"
9
#include "grid.h"
10
#include "range_mapper.h"
11
#include "ranges.h"
12
#include "types.h"
13
#include "utils.h"
14

15
namespace celerity::detail {
16

17
template <int KernelDims>
18
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {
35,350✔
19
        switch(rm->get_buffer_dimensions()) {
35,350!
20
        case 0: return region_cast<3>(region(box<0>()));
24,822✔
21
        case 1: return region_cast<3>(rm->map_1(chnk));
6,781✔
22
        case 2: return region_cast<3>(rm->map_2(chnk));
15,172✔
23
        case 3: return rm->map_3(chnk);
986✔
24
        default: utils::unreachable(); // LCOV_EXCL_LINE
25
        }
26
}
27

28
region<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {
35,350✔
29
        switch(kernel_dims) {
35,350!
30
        case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));
28,982✔
31
        case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));
17,237✔
32
        case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));
23,128✔
33
        case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));
1,351✔
34
        default: utils::unreachable(); // LCOV_EXCL_LINE
35
        }
36
}
37

38
buffer_access_map::buffer_access_map(std::vector<buffer_access>&& accesses, const task_geometry& geometry)
3,234✔
39
    : m_accesses(std::move(accesses)), m_task_geometry(geometry) {
3,234✔
40
        std::unordered_map<buffer_id, region_builder<3>> consumed_regions;
3,234✔
41
        std::unordered_map<buffer_id, region_builder<3>> produced_regions;
3,234✔
42
        for(size_t i = 0; i < m_accesses.size(); ++i) {
6,666✔
43
                const auto& [bid, mode, rm] = m_accesses[i];
3,434✔
44
                m_accessed_buffers.insert(bid);
3,434✔
45
                const auto req = apply_range_mapper(rm.get(), chunk<3>{geometry.global_offset, geometry.global_size, geometry.global_size}, geometry.dimensions);
3,434✔
46
                auto& cons = consumed_regions[bid]; // allow default-insert
3,432✔
47
                auto& prod = produced_regions[bid]; // allow default-insert
3,432✔
48
                if(access::mode_traits::is_consumer(mode)) { cons.add(req); }
3,432✔
49
                if(access::mode_traits::is_producer(mode)) { prod.add(req); }
3,432✔
50
        }
3,432✔
51
        for(auto& [bid, builder] : consumed_regions) {
6,348✔
52
                m_task_consumed_regions.emplace(bid, std::move(builder).into_region());
3,116✔
53
        }
54
        for(auto& [bid, builder] : produced_regions) {
6,348✔
55
                m_task_produced_regions.emplace(bid, std::move(builder).into_region());
3,116✔
56
        }
57
}
6,476✔
58

59
region<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const box<3>& execution_range) const {
31,916✔
60
        const auto sr = execution_range.get_subrange();
31,916✔
61
        return apply_range_mapper(m_accesses[n].range_mapper.get(), chunk<3>{sr.offset, sr.range, m_task_geometry.global_size}, m_task_geometry.dimensions);
95,748✔
62
}
63

64
region<3> buffer_access_map::compute_consumed_region(const buffer_id bid, const box<3>& execution_range) const {
14,137✔
65
        region_builder<3> builder;
14,137✔
66
        for(size_t i = 0; i < m_accesses.size(); ++i) {
34,718✔
67
                const auto& [b, m, _] = m_accesses[i];
20,581✔
68
                if(b != bid || !access::mode_traits::is_consumer(m)) continue;
20,581✔
69
                builder.add(get_requirements_for_nth_access(i, execution_range));
11,129✔
70
        }
71
        return std::move(builder).into_region();
28,274✔
72
}
14,137✔
73

74
region<3> buffer_access_map::compute_produced_region(const buffer_id bid, const box<3>& execution_range) const {
18,278✔
75
        region_builder<3> builder;
18,278✔
76
        for(size_t i = 0; i < m_accesses.size(); ++i) {
45,304✔
77
                const auto& [b, m, _] = m_accesses[i];
27,026✔
78
                if(b != bid || !access::mode_traits::is_producer(m)) continue;
27,026✔
79
                builder.add(get_requirements_for_nth_access(i, execution_range));
12,034✔
80
        }
81
        return std::move(builder).into_region();
36,556✔
82
}
18,278✔
83

84
box_vector<3> buffer_access_map::compute_required_contiguous_boxes(const buffer_id bid, const box<3>& execution_range) const {
3,256✔
85
        box_vector<3> boxes;
3,256✔
86
        for(size_t i = 0; i < m_accesses.size(); ++i) {
7,850✔
87
                const auto& [b, a_mode, _] = m_accesses[i];
4,594✔
88
                if(b == bid) {
4,594✔
89
                        const auto accessed_region = get_requirements_for_nth_access(i, execution_range);
3,100✔
90
                        if(!accessed_region.empty()) { boxes.push_back(bounding_box(accessed_region)); }
3,100✔
91
                }
3,100✔
92
        }
93
        return boxes;
3,256✔
94
}
×
95

96
} // namespace celerity::detail
97

98
namespace celerity {
99
namespace detail {
100

101
        void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) {
228✔
102
                // TODO for multiple side effects on the same hoid, find the weakest order satisfying all of them
103
                emplace(hoid, order);
228✔
104
        }
228✔
105

106
        std::string print_task_debug_label(const task& tsk, bool title_case) {
34✔
107
                return utils::make_task_debug_label(tsk.get_type(), tsk.get_id(), tsk.get_debug_name(), title_case);
34✔
108
        }
109

110
        std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {
5,114✔
111
                const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});
5,114✔
112

113
                auto& bam = tsk.get_buffer_access_map();
5,114✔
114

115
                // track the union of writes we have checked so far in order to detect an overlap between that union and the next write
116
                std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;
5,114✔
117
                // collect overlapping writes in order to report all of them before throwing
118
                std::unordered_map<buffer_id, region<3>> overlapping_writes;
5,114✔
119

120
                for(const auto bid : bam.get_accessed_buffers()) {
10,519✔
121
                        for(const auto& ck : chunks) {
12,865✔
122
                                const auto writes = bam.compute_produced_region(bid, ck.get_subrange());
7,460✔
123
                                if(!writes.empty()) {
7,460✔
124
                                        auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert
4,917✔
125
                                        if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {
4,917✔
126
                                                auto& full_overlap = overlapping_writes[bid]; // allow default-insert
23✔
127
                                                full_overlap = region_union(full_overlap, overlap);
23✔
128
                                        }
4,917✔
129
                                        write_accumulator = region_union(write_accumulator, writes);
4,917✔
130
                                }
131
                        }
7,460✔
132
                }
133

134
                // we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
135
                for(const auto& rinfo : tsk.get_reductions()) {
5,392✔
136
                        auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert
278✔
137
                        if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {
278!
138
                                auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert
×
139
                                full_overlap = region_union(full_overlap, overlap);
×
140
                        }
278✔
141
                        write_accumulator = region_union(write_accumulator, scalar_reduction_box);
278✔
142
                }
143

144
                return overlapping_writes;
5,114✔
145
        }
5,114✔
146

147
} // namespace detail
148
} // namespace celerity
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc