10074265948

Committed 24 Jul 2024 09:38AM UTC coverage: 92.909% (-0.2%) from 93.126%

Build # 10074265948

Build Type

push

github

Committed by

fknorr

Commit Message

Introduce new backend and executor infrastructure

'executor' has two implementations, live_executor (for normal execution)
and dry_run_executor for dry runs. The live_executor maintains the state
persistent between instructions, such as memory allocations, but
delegates all state management complexity to out_of_order_engine and
receive_arbiter.

'backend' is an abstract interface for executing any operation that
might touch backend-allocated memory. The interface itself is
independent of SYCL, and the tie-in happens with the sycl_backend
derived class. Backend specialization for CUDA nd-copies is achieved
by instantiating either a sycl_generic_backend or a sycl_cuda_backend.

Run Details

3305 of 3839 branches covered (86.09%)

Branch coverage included in aggregate %.

509 of 553 new or added lines in 15 files covered. (92.04%)

2 existing lines in 2 files now uncovered.

7753 of 8063 relevant lines covered (96.16%)

180915.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.06

/src/task.cc

#include "task.h"

#include "access_modes.h"
#include "utils.h"


namespace celerity {
namespace detail {

        std::unordered_set<buffer_id> buffer_access_map::get_accessed_buffers() const {
                std::unordered_set<buffer_id> result;
                for(const auto& [bid, _] : m_accesses) {
                        result.emplace(bid);
                }
                return result;
        }

        std::unordered_set<cl::sycl::access::mode> buffer_access_map::get_access_modes(buffer_id bid) const {
                std::unordered_set<cl::sycl::access::mode> result;
                for(const auto& [b, rm] : m_accesses) {
                        if(b == bid) { result.insert(rm->get_access_mode()); }
                }
                return result;
        }

        template <int KernelDims>
        subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {
                switch(rm->get_buffer_dimensions()) {
                case 0: return subrange_cast<3>(subrange<0>());
                case 1: return subrange_cast<3>(rm->map_1(chnk));
                case 2: return subrange_cast<3>(rm->map_2(chnk));
                case 3: return rm->map_3(chnk);
                default: assert(false); return subrange<3>{};
                }
        }

        subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {
                switch(kernel_dims) {
                case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));
                case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));
                case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));
                case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));
                default: assert(!"Unreachable"); return subrange<3>{};
                }
        }

        region<3> buffer_access_map::get_mode_requirements(
            const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
                box_vector<3> boxes;
                for(size_t i = 0; i < m_accesses.size(); ++i) {
                        if(m_accesses[i].first != bid || m_accesses[i].second->get_access_mode() != mode) continue;
                        boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size));
                }
                return region(std::move(boxes));
        }

        box<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
                return apply_range_mapper(m_accesses[n].second.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims);
        }

        box_vector<3> buffer_access_map::get_required_contiguous_boxes(
            const buffer_id bid, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
                box_vector<3> boxes;
                for(const auto& [a_bid, a_rm] : m_accesses) {
                        if(a_bid == bid) {
                                const auto accessed_box = box(apply_range_mapper(a_rm.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims));
                                if(!accessed_box.empty()) { boxes.push_back(accessed_box); }
                        }
                }
                return boxes;
        }

        void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) {
                // TODO for multiple side effects on the same hoid, find the weakest order satisfying all of them
                emplace(hoid, order);
        }

        std::string print_task_debug_label(const task& tsk, bool title_case) {
                return utils::make_task_debug_label(tsk.get_type(), tsk.get_id(), tsk.get_debug_name(), title_case);
        }

        std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {
                const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});

                auto& bam = tsk.get_buffer_access_map();

                // track the union of writes we have checked so far in order to detect an overlap between that union and the next write
                std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;
                // collect overlapping writes in order to report all of them before throwing
                std::unordered_map<buffer_id, region<3>> overlapping_writes;

                for(const auto bid : bam.get_accessed_buffers()) {
                        for(const auto& ck : chunks) {
                                region<3> writes;
                                for(const auto mode : bam.get_access_modes(bid)) {
                                        if(access::mode_traits::is_producer(mode)) {
                                                const auto req = bam.get_mode_requirements(bid, mode, tsk.get_dimensions(), ck.get_subrange(), tsk.get_global_size());
                                                writes = region_union(writes, req);
                                        }
                                }
                                if(!writes.empty()) {
                                        auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert
                                        if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {
                                                auto& full_overlap = overlapping_writes[bid]; // allow default-insert
                                                full_overlap = region_union(full_overlap, overlap);
                                        }
                                        write_accumulator = region_union(write_accumulator, writes);
                                }
                        }
                }

                // we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
                for(const auto& rinfo : tsk.get_reductions()) {
                        auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert
                        if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {
                                auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert
                                full_overlap = region_union(full_overlap, overlap);
                        }
                        write_accumulator = region_union(write_accumulator, scalar_reduction_box);
                }

                return overlapping_writes;
        }

} // namespace detail
} // namespace celerity

1	#include "task.h"
2
3	#include "access_modes.h"
4	#include "utils.h"
5
6
7	namespace celerity {
8	namespace detail {
9
10	std::unordered_set<buffer_id> buffer_access_map::get_accessed_buffers() const {	30,328✔
11	std::unordered_set<buffer_id> result;	30,328✔
12	for(const auto& [bid, _] : m_accesses) {	55,792✔
13	result.emplace(bid);	25,466✔
14	}
15	return result;	30,326✔
16	}	×
17
18	std::unordered_set<cl::sycl::access::mode> buffer_access_map::get_access_modes(buffer_id bid) const {	22,005✔
19	std::unordered_set<cl::sycl::access::mode> result;	22,005✔
20	for(const auto& [b, rm] : m_accesses) {	51,511✔
21	if(b == bid) { result.insert(rm->get_access_mode()); }	29,506✔
22	}
23	return result;	22,005✔
24	}	×
25
26	template <int KernelDims>
27	subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<KernelDims>& chnk) {	28,731✔
28	switch(rm->get_buffer_dimensions()) {	28,731!
29	case 0: return subrange_cast<3>(subrange<0>());	12,236✔
30	case 1: return subrange_cast<3>(rm->map_1(chnk));	24,551✔
31	case 2: return subrange_cast<3>(rm->map_2(chnk));	18,968✔
32	case 3: return rm->map_3(chnk);	851✔
33	default: assert(false); return subrange<3>{};	×
34	}
35	}
36
37	subrange<3> apply_range_mapper(const range_mapper_base* rm, const chunk<3>& chnk, int kernel_dims) {	28,734✔
38	switch(kernel_dims) {	28,734!
39	case 0: return apply_range_mapper<0>(rm, chunk_cast<0>(chnk));	28,819✔
40	case 1: return apply_range_mapper<1>(rm, chunk_cast<1>(chnk));	16,712✔
41	case 2: return apply_range_mapper<2>(rm, chunk_cast<2>(chnk));	10,851✔
42	case 3: return apply_range_mapper<3>(rm, chunk_cast<3>(chnk));	1,077✔
UNCOV 43	default: assert(!"Unreachable"); return subrange<3>{};	×
44	}
45	}
46
47	region<3> buffer_access_map::get_mode_requirements(	55,107✔
48	const buffer_id bid, const access_mode mode, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
49	box_vector<3> boxes;	55,107✔
50	for(size_t i = 0; i < m_accesses.size(); ++i) {	121,629✔
51	if(m_accesses[i].first != bid \|\| m_accesses[i].second->get_access_mode() != mode) continue;	66,517✔
52	boxes.push_back(get_requirements_for_nth_access(i, kernel_dims, sr, global_size));	25,019✔
53	}
54	return region(std::move(boxes));	110,201✔
55	}	55,107✔
56
57	box<3> buffer_access_map::get_requirements_for_nth_access(const size_t n, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {	28,341✔
58	return apply_range_mapper(m_accesses[n].second.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims);	56,673✔
59	}
60
61	box_vector<3> buffer_access_map::get_required_contiguous_boxes(	447✔
62	const buffer_id bid, const int kernel_dims, const subrange<3>& sr, const range<3>& global_size) const {
63	box_vector<3> boxes;	447✔
64	for(const auto& [a_bid, a_rm] : m_accesses) {	859✔
65	if(a_bid == bid) {	412✔
66	const auto accessed_box = box(apply_range_mapper(a_rm.get(), chunk<3>{sr.offset, sr.range, global_size}, kernel_dims));	392✔
67	if(!accessed_box.empty()) { boxes.push_back(accessed_box); }	392✔
68	}
69	}
70	return boxes;	447✔
71	}	×
72
73	void side_effect_map::add_side_effect(const host_object_id hoid, const experimental::side_effect_order order) {	228✔
74	// TODO for multiple side effects on the same hoid, find the weakest order satisfying all of them
75	emplace(hoid, order);	228✔
76	}	228✔
77
78	std::string print_task_debug_label(const task& tsk, bool title_case) {	31✔
79	return utils::make_task_debug_label(tsk.get_type(), tsk.get_id(), tsk.get_debug_name(), title_case);	31✔
80	}
81
82	std::unordered_map<buffer_id, region<3>> detect_overlapping_writes(const task& tsk, const box_vector<3>& chunks) {	5,255✔
83	const box<3> scalar_reduction_box({0, 0, 0}, {1, 1, 1});	5,255✔
84
85	auto& bam = tsk.get_buffer_access_map();	5,255✔
86
87	// track the union of writes we have checked so far in order to detect an overlap between that union and the next write
88	std::unordered_map<buffer_id, region<3>> buffer_write_accumulators;	5,255✔
89	// collect overlapping writes in order to report all of them before throwing
90	std::unordered_map<buffer_id, region<3>> overlapping_writes;	5,255✔
91
92	for(const auto bid : bam.get_accessed_buffers()) {	9,445✔
93	for(const auto& ck : chunks) {	9,583✔
94	region<3> writes;	5,393✔
95	for(const auto mode : bam.get_access_modes(bid)) {	11,118✔
96	if(access::mode_traits::is_producer(mode)) {	5,725✔
97	const auto req = bam.get_mode_requirements(bid, mode, tsk.get_dimensions(), ck.get_subrange(), tsk.get_global_size());	3,944✔
98	writes = region_union(writes, req);	3,944✔
99	}	3,944✔
100	}	5,393✔
101	if(!writes.empty()) {	5,393✔
102	auto& write_accumulator = buffer_write_accumulators[bid]; // allow default-insert	3,897✔
103	if(const auto overlap = region_intersection(write_accumulator, writes); !overlap.empty()) {	3,897✔
104	auto& full_overlap = overlapping_writes[bid]; // allow default-insert	20✔
105	full_overlap = region_union(full_overlap, overlap);	20✔
106	}	3,897✔
107	write_accumulator = region_union(write_accumulator, writes);	3,897✔
108	}
109	}	5,393✔
110	}	5,255✔
111
112	// we already check for accessor-reduction overlaps on task generation, but we still repeat the sanity-check here
113	for(const auto& rinfo : tsk.get_reductions()) {	5,441✔
114	auto& write_accumulator = buffer_write_accumulators[rinfo.bid]; // allow default-insert	186✔
115	if(const auto overlap = region_intersection(write_accumulator, scalar_reduction_box); !overlap.empty()) {	186!
116	auto& full_overlap = overlapping_writes[rinfo.bid]; // allow default-insert	×
117	full_overlap = region_union(full_overlap, overlap);	×
118	}	186✔
119	write_accumulator = region_union(write_accumulator, scalar_reduction_box);	186✔
120	}
121
122	return overlapping_writes;	5,255✔
123	}	5,255✔
124
125	} // namespace detail
126	} // namespace celerity

celerity / celerity-runtime / 10074265948

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous