• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

celerity / celerity-runtime / 12009901531

25 Nov 2024 12:20PM UTC coverage: 94.92% (+0.009%) from 94.911%
12009901531

push

github

fknorr
Add missing includes and consistently order them

We can't add the misc-include-cleaner lint because it causes too many
false positives with "interface headers" such as sycl.hpp.

3190 of 3626 branches covered (87.98%)

Branch coverage included in aggregate %.

7049 of 7161 relevant lines covered (98.44%)

1242183.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.97
/src/split.cc
1
#include "split.h"
2

3
#include "grid.h"
4
#include "ranges.h"
5

6
#include <algorithm>
7
#include <array>
8
#include <cassert>
9
#include <cmath>
10
#include <cstddef>
11
#include <tuple>
12
#include <vector>
13

14

15
namespace {
16

17
using namespace celerity;
18
using namespace celerity::detail;
19

20
[[maybe_unused]] void sanity_check_split(const chunk<3>& full_chunk, const std::vector<chunk<3>>& split) {
8,960✔
21
        region<3> reconstructed_chunk;
8,960✔
22
        for(auto& chnk : split) {
21,452✔
23
                assert(region_intersection(reconstructed_chunk, box<3>(chnk)).empty());
12,492✔
24
                reconstructed_chunk = region_union(box<3>(chnk), reconstructed_chunk);
12,492✔
25
        }
26
        assert(region_difference(reconstructed_chunk, box<3>(full_chunk)).empty());
8,960✔
27
}
17,920✔
28

29
template <int Dims>
30
std::tuple<range<Dims>, range<Dims>, range<Dims>> compute_small_and_large_chunks(
8,960✔
31
    const chunk<3>& full_chunk, const range<3>& granularity, const std::array<size_t, Dims>& actual_num_chunks) {
32
        range<Dims> small_chunk_size{zeros};
8,960✔
33
        range<Dims> large_chunk_size{zeros};
8,960✔
34
        range<Dims> num_large_chunks{zeros};
8,960✔
35
        for(int d = 0; d < Dims; ++d) {
18,271✔
36
                const size_t ideal_chunk_size = full_chunk.range[d] / actual_num_chunks[d];
9,311✔
37
                small_chunk_size[d] = (ideal_chunk_size / granularity[d]) * granularity[d];
9,311✔
38
                large_chunk_size[d] = small_chunk_size[d] + granularity[d];
9,311✔
39
                num_large_chunks[d] = (full_chunk.range[d] - small_chunk_size[d] * actual_num_chunks[d]) / granularity[d];
9,311✔
40
        }
41
        return {small_chunk_size, large_chunk_size, num_large_chunks};
17,920✔
42
}
43

44
/**
45
 * Given a factorization of `num_chunks` (i.e., `f0 * f1 = num_chunks`), try to find the assignment of factors to
46
 * dimensions that produces more chunks under the given constraints. If they are tied, try to find the assignment
47
 * that results in a "nicer" split according to some heuristics (see below).
48
 *
49
 * The single argument `factor` specifies both factors, as `f0 = factor` and `f1 = num_chunks / factor`.
50
 *
51
 * @returns The number of chunks that can be created in dimension 0 and dimension 1, respectively. These are at most
52
 *          (f0, f1) or (f1, f0), however may be less if constrained by the split granularity.
53
 */
54
std::array<size_t, 2> assign_split_factors_2d(const chunk<3>& full_chunk, const range<3>& granularity, const size_t factor, const size_t num_chunks) {
356✔
55
        assert(num_chunks % factor == 0);
356✔
56
        const size_t max_chunks[2] = {full_chunk.range[0] / granularity[0], full_chunk.range[1] / granularity[1]};
356✔
57
        const size_t f0 = factor;
356✔
58
        const size_t f1 = num_chunks / factor;
356✔
59

60
        // Decide in which direction to split by first checking which
61
        // factor assignment produces more chunks under the given constraints.
62
        const std::array<size_t, 2> split_0_1 = {std::min(f0, max_chunks[0]), std::min(f1, max_chunks[1])};
356✔
63
        const std::array<size_t, 2> split_1_0 = {std::min(f1, max_chunks[0]), std::min(f0, max_chunks[1])};
356✔
64
        const auto count0 = split_0_1[0] * split_0_1[1];
356✔
65
        const auto count1 = split_1_0[0] * split_1_0[1];
356✔
66

67
        if(count0 > count1) { return split_0_1; }
356✔
68
        if(count0 < count1) { return split_1_0; }
355✔
69

70
        // If we're tied for the number of chunks we can create, try some heuristics to decide.
71

72
        // If domain is square(-ish), prefer splitting along slower dimension.
73
        // (These bounds have been chosen arbitrarily!)
74
        const double squareishness = std::sqrt(full_chunk.range.size()) / static_cast<double>(full_chunk.range[0]);
347✔
75
        if(squareishness > 0.95 && squareishness < 1.05) { return (f0 >= f1) ? split_0_1 : split_1_0; }
347✔
76

77
        // For non-square domains, prefer split that produces shorter edges (compare sum of circumferences)
78
        const auto circ0 = full_chunk.range[0] / split_0_1[0] + full_chunk.range[1] / split_0_1[1];
27✔
79
        const auto circ1 = full_chunk.range[0] / split_1_0[0] + full_chunk.range[1] / split_1_0[1];
27✔
80
        return circ0 < circ1 ? split_0_1 : split_1_0;
27✔
81

82
        // TODO: Yet another heuristic we may want to consider is how even chunk sizes are,
83
        // i.e., how balanced the workload is.
84
}
85

86
} // namespace
87

88
namespace celerity::detail {
89

90
std::vector<chunk<3>> split_1d(const chunk<3>& full_chunk, const range<3>& granularity, const size_t num_chunks) {
8,609✔
91
#ifndef NDEBUG
92
        assert(num_chunks > 0);
8,609✔
93
        for(int d = 0; d < 3; ++d) {
34,436✔
94
                assert(granularity[d] > 0);
25,827✔
95
                assert(full_chunk.range[d] % granularity[d] == 0);
25,827✔
96
        }
97
#endif
98

99
        // Due to split granularity requirements or if num_workers > global_size[0],
100
        // we may not be able to create the requested number of chunks.
101
        const std::array<size_t, 1> actual_num_chunks = {std::min(num_chunks, full_chunk.range[0] / granularity[0])};
8,609✔
102
        const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<1>(full_chunk, granularity, actual_num_chunks);
8,609✔
103

104
        std::vector<chunk<3>> result(actual_num_chunks[0], {full_chunk.offset, full_chunk.range, full_chunk.global_size});
25,827✔
105
        for(auto i = 0u; i < num_large_chunks[0]; ++i) {
8,705✔
106
                result[i].range[0] = large_chunk_size[0];
96✔
107
                result[i].offset[0] += i * large_chunk_size[0];
96✔
108
        }
109
        for(auto i = num_large_chunks[0]; i < actual_num_chunks[0]; ++i) {
20,088✔
110
                result[i].range[0] = small_chunk_size[0];
11,479✔
111
                result[i].offset[0] += num_large_chunks[0] * large_chunk_size[0] + (i - num_large_chunks[0]) * small_chunk_size[0];
11,479✔
112
        }
113

114
#ifndef NDEBUG
115
        sanity_check_split(full_chunk, result);
8,609✔
116
#endif
117

118
        return result;
17,218✔
119
}
×
120

121
// TODO: Make the split dimensions configurable for 3D chunks?
122
std::vector<chunk<3>> split_2d(const chunk<3>& full_chunk, const range<3>& granularity, const size_t num_chunks) {
351✔
123
#ifndef NDEBUG
124
        assert(num_chunks > 0);
351✔
125
        for(int d = 0; d < 3; ++d) {
1,404✔
126
                assert(granularity[d] > 0);
1,053✔
127
                assert(full_chunk.range[d] % granularity[d] == 0);
1,053✔
128
        }
129
#endif
130

131
        // Factorize num_chunks
132
        // We start out with an initial guess of `factor = floor(sqrt(num_chunks))` (the other one is implicitly given by `num_chunks / factor`),
133
        // and work our way down, keeping track of the best factorization we've found so far, until we find a factorization that produces
134
        // the requested number of chunks, or until we reach (1, num_chunks), i.e., a 1D split.
135
        size_t factor = std::floor(std::sqrt(num_chunks));
351✔
136
        std::array<size_t, 2> best_chunk_counts = {0, 0};
351✔
137
        while(factor >= 1) {
357✔
138
                while(factor > 1 && num_chunks % factor != 0) {
356!
139
                        factor--;
×
140
                }
141
                // The returned counts are at most (factor, num_chunks / factor), however may be less if constrained by the split granularity.
142
                const auto chunk_counts = assign_split_factors_2d(full_chunk, granularity, factor, num_chunks);
356✔
143
                if(chunk_counts[0] * chunk_counts[1] > best_chunk_counts[0] * best_chunk_counts[1]) { best_chunk_counts = chunk_counts; }
356!
144
                if(chunk_counts[0] * chunk_counts[1] == num_chunks) { break; }
356✔
145
                factor--;
6✔
146
        }
147
        const auto actual_num_chunks = best_chunk_counts;
351✔
148
        const auto [small_chunk_size, large_chunk_size, num_large_chunks] = compute_small_and_large_chunks<2>(full_chunk, granularity, actual_num_chunks);
351✔
149

150
        std::vector<chunk<3>> result(actual_num_chunks[0] * actual_num_chunks[1], {full_chunk.offset, full_chunk.range, full_chunk.global_size});
1,053✔
151
        id<3> offset = full_chunk.offset;
351✔
152

153
        for(size_t j = 0; j < actual_num_chunks[0]; ++j) {
905✔
154
                range<2> chunk_size = {(j < num_large_chunks[0]) ? large_chunk_size[0] : small_chunk_size[0], 0};
554✔
155
                for(size_t i = 0; i < actual_num_chunks[1]; ++i) {
1,471✔
156
                        chunk_size[1] = (i < num_large_chunks[1]) ? large_chunk_size[1] : small_chunk_size[1];
917✔
157
                        auto& chnk = result[j * actual_num_chunks[1] + i];
917✔
158
                        chnk.offset = offset;
917✔
159
                        chnk.range[0] = chunk_size[0];
917✔
160
                        chnk.range[1] = chunk_size[1];
917✔
161
                        offset[1] += chunk_size[1];
917✔
162
                }
163
                offset[0] += chunk_size[0];
554✔
164
                offset[1] = full_chunk.offset[1];
554✔
165
        }
166

167
#ifndef NDEBUG
168
        sanity_check_split(full_chunk, result);
351✔
169
#endif
170

171
        return result;
702✔
172
}
×
173
} // namespace celerity::detail
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc