• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / jorgen.edelbo_333

01 Jul 2024 07:21AM UTC coverage: 90.865% (-0.08%) from 90.948%
jorgen.edelbo_333

Pull #7826

Evergreen

jedelbo
Merge tag 'v14.10.2' into next-major
Pull Request #7826: Merge Next major

102912 of 181138 branches covered (56.81%)

3131 of 3738 new or added lines in 54 files covered. (83.76%)

80 existing lines in 14 files now uncovered.

217498 of 239364 relevant lines covered (90.86%)

6655796.15 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.67
/src/realm/array_direct.hpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#ifndef REALM_ARRAY_DIRECT_HPP
20
#define REALM_ARRAY_DIRECT_HPP
21

22
#include <cstring>
23
#include <realm/utilities.hpp>
24
#include <realm/alloc.hpp>
25

26
// clang-format off
27
/* wid == 16/32 likely when accessing offsets in B tree */
28
#define REALM_TEMPEX(fun, wid, arg) \
2,146,043,046✔
29
if (wid == 16) {fun<16> arg;} \
2,146,043,046✔
30
else if (wid == 32) {fun<32> arg;} \
2,146,043,046✔
31
else if (wid == 0) {fun<0> arg;} \
1,764,149,961✔
32
else if (wid == 1) {fun<1> arg;} \
816,934,749✔
33
else if (wid == 2) {fun<2> arg;} \
750,500,469✔
34
else if (wid == 4) {fun<4> arg;} \
564,261,840✔
35
else if (wid == 8) {fun<8> arg;} \
405,275,124✔
36
else if (wid == 64) {fun<64> arg;} \
405,036,390✔
37
else {REALM_ASSERT_DEBUG(false); fun<0> arg;}
4,294,967,294✔
38

39
#define REALM_TEMPEX2(fun, targ, wid, arg) \
11,932,950✔
40
if (wid == 16) {fun<targ, 16> arg;} \
11,932,950✔
41
else if (wid == 32) {fun<targ, 32> arg;} \
11,932,950✔
42
else if (wid == 0) {fun<targ, 0> arg;} \
10,773,102✔
43
else if (wid == 1) {fun<targ, 1> arg;} \
4,111,053✔
44
else if (wid == 2) {fun<targ, 2> arg;} \
3,871,077✔
45
else if (wid == 4) {fun<targ, 4> arg;} \
3,121,812✔
46
else if (wid == 8) {fun<targ, 8> arg;} \
830,187✔
47
else if (wid == 64) {fun<targ, 64> arg;} \
635,355✔
48
else {REALM_ASSERT_DEBUG(false); fun<targ, 0> arg;}
4,294,967,294✔
49

50
#define REALM_TEMPEX3(fun, targ1, wid, targ3, arg) \
51
if (wid == 16) {fun<targ1, 16, targ3> arg;} \
52
else if (wid == 32) {fun<targ1, 32, targ3> arg;} \
53
else if (wid == 0) {fun<targ1, 0, targ3> arg;} \
54
else if (wid == 1) {fun<targ1, 1, targ3> arg;} \
55
else if (wid == 2) {fun<targ1, 2, targ3> arg;} \
56
else if (wid == 4) {fun<targ1, 4, targ3> arg;} \
57
else if (wid == 8) {fun<targ1, 8, targ3> arg;} \
58
else if (wid == 64) {fun<targ1, 64, targ3> arg;} \
59
else {REALM_ASSERT_DEBUG(false); fun<targ1, 0, targ3> arg;}
60

61
#define REALM_TEMPEX4(fun, targ1, targ3, targ4, wid, arg) \
62
if (wid == 16) {fun<targ1, targ3, targ4, 16> arg;} \
63
else if (wid == 32) {fun<targ1, targ3, targ4, 32> arg;} \
64
else if (wid == 0) {fun<targ1, targ3, targ4, 0> arg;} \
65
else if (wid == 1) {fun<targ1, targ3, targ4, 1> arg;} \
66
else if (wid == 2) {fun<targ1, targ3, targ4, 2> arg;} \
67
else if (wid == 4) {fun<targ1, targ3, targ4, 4> arg;} \
68
else if (wid == 8) {fun<targ1, targ3, targ4, 8> arg;} \
69
else if (wid == 64) {fun<targ1, targ3, targ4, 64> arg;} \
70
else {REALM_ASSERT_DEBUG(false); fun<targ1, targ3, targ4, 0> arg;}
71
// clang-format on
72

73
namespace realm {
74

75
// Direct access methods
76

77
template <size_t width>
78
void set_direct(char* data, size_t ndx, int_fast64_t value) noexcept
79
{
1,312,852,482✔
80
    if (width == 0) {
1,312,852,482✔
81
        REALM_ASSERT_DEBUG(value == 0);
31,156,881!
82
        return;
31,156,881✔
83
    }
31,156,881✔
84
    else if (width == 1) {
1,281,695,601✔
85
        REALM_ASSERT_DEBUG(0 <= value && value <= 0x01);
164,001,255!
86
        size_t byte_ndx = ndx / 8;
164,001,255✔
87
        size_t bit_ndx = ndx % 8;
164,001,255✔
88
        typedef unsigned char uchar;
164,001,255✔
89
        uchar* p = reinterpret_cast<uchar*>(data) + byte_ndx;
164,001,255✔
90
        *p = uchar((*p & ~(0x01 << bit_ndx)) | (int(value) & 0x01) << bit_ndx);
164,001,255✔
91
    }
164,001,255✔
92
    else if (width == 2) {
1,117,694,346✔
93
        REALM_ASSERT_DEBUG(0 <= value && value <= 0x03);
143,919,099!
94
        size_t byte_ndx = ndx / 4;
143,919,099✔
95
        size_t bit_ndx = ndx % 4 * 2;
143,919,099✔
96
        typedef unsigned char uchar;
143,919,099✔
97
        uchar* p = reinterpret_cast<uchar*>(data) + byte_ndx;
143,919,099✔
98
        *p = uchar((*p & ~(0x03 << bit_ndx)) | (int(value) & 0x03) << bit_ndx);
143,919,099✔
99
    }
143,919,099✔
100
    else if (width == 4) {
973,775,247✔
101
        REALM_ASSERT_DEBUG(0 <= value && value <= 0x0F);
10,416,735!
102
        size_t byte_ndx = ndx / 2;
10,416,735✔
103
        size_t bit_ndx = ndx % 2 * 4;
10,416,735✔
104
        typedef unsigned char uchar;
10,416,735✔
105
        uchar* p = reinterpret_cast<uchar*>(data) + byte_ndx;
10,416,735✔
106
        *p = uchar((*p & ~(0x0F << bit_ndx)) | (int(value) & 0x0F) << bit_ndx);
10,416,735✔
107
    }
10,416,735✔
108
    else if (width == 8) {
963,358,512✔
109
        REALM_ASSERT_DEBUG(std::numeric_limits<int8_t>::min() <= value &&
146,966,991!
110
                           value <= std::numeric_limits<int8_t>::max());
146,966,991✔
111
        *(reinterpret_cast<int8_t*>(data) + ndx) = int8_t(value);
146,966,991✔
112
    }
146,966,991✔
113
    else if (width == 16) {
816,391,521✔
114
        REALM_ASSERT_DEBUG(std::numeric_limits<int16_t>::min() <= value &&
284,931,762!
115
                           value <= std::numeric_limits<int16_t>::max());
284,931,762✔
116
        *(reinterpret_cast<int16_t*>(data) + ndx) = int16_t(value);
284,931,762✔
117
    }
284,931,762✔
118
    else if (width == 32) {
531,459,759✔
119
        REALM_ASSERT_DEBUG(std::numeric_limits<int32_t>::min() <= value &&
387,506,427!
120
                           value <= std::numeric_limits<int32_t>::max());
387,506,427✔
121
        *(reinterpret_cast<int32_t*>(data) + ndx) = int32_t(value);
387,506,427✔
122
    }
387,506,427✔
123
    else if (width == 64) {
144,369,432✔
124
        REALM_ASSERT_DEBUG(std::numeric_limits<int64_t>::min() <= value &&
144,344,226!
125
                           value <= std::numeric_limits<int64_t>::max());
144,344,226✔
126
        *(reinterpret_cast<int64_t*>(data) + ndx) = int64_t(value);
144,344,226✔
127
    }
144,344,226✔
128
    else {
2,147,508,853✔
129
        REALM_ASSERT_DEBUG(false);
2,147,508,853✔
130
    }
2,147,508,853✔
131
}
1,312,852,482✔
132

133
inline void set_direct(char* data, size_t width, size_t ndx, int_fast64_t value) noexcept
134
{
×
135
    REALM_TEMPEX(set_direct, width, (data, ndx, value));
×
136
}
×
137

138
template <size_t width>
139
void fill_direct(char* data, size_t begin, size_t end, int_fast64_t value) noexcept
140
{
187,524✔
141
    for (size_t i = begin; i != end; ++i)
200,928!
142
        set_direct<width>(data, i, value);
13,404✔
143
}
187,524✔
144

145
template <int w>
146
int64_t get_direct(const char* data, size_t ndx) noexcept
147
{
568,579,980✔
148
    if (w == 0) {
568,579,980✔
149
        return 0;
2,299,488✔
150
    }
2,299,488✔
151
    if (w == 1) {
566,280,492✔
152
        size_t offset = ndx >> 3;
3,357,462✔
153
        return (data[offset] >> (ndx & 7)) & 0x01;
3,357,462✔
154
    }
3,357,462✔
155
    if (w == 2) {
562,923,030✔
156
        size_t offset = ndx >> 2;
6,724,731✔
157
        return (data[offset] >> ((ndx & 3) << 1)) & 0x03;
6,724,731✔
158
    }
6,724,731✔
159
    if (w == 4) {
556,198,299✔
160
        size_t offset = ndx >> 1;
1,156,443✔
161
        return (data[offset] >> ((ndx & 1) << 2)) & 0x0F;
1,156,443✔
162
    }
1,156,443✔
163
    if (w == 8) {
555,041,856✔
164
        return *reinterpret_cast<const signed char*>(data + ndx);
25,726,686✔
165
    }
25,726,686✔
166
    if (w == 16) {
529,315,170✔
167
        size_t offset = ndx << 1;
44,654,103✔
168
        return *reinterpret_cast<const int16_t*>(data + offset);
44,654,103✔
169
    }
44,654,103✔
170
    if (w == 32) {
484,661,067✔
171
        size_t offset = ndx << 2;
349,376,274✔
172
        return *reinterpret_cast<const int32_t*>(data + offset);
349,376,274✔
173
    }
349,376,274✔
174
    if (w == 64) {
135,337,239✔
175
        size_t offset = ndx << 3;
135,282,207✔
176
        return *reinterpret_cast<const int64_t*>(data + offset);
135,282,207✔
177
    }
135,282,207✔
178
    REALM_ASSERT_DEBUG(false);
2,147,538,679✔
179
    return int64_t(-1);
2,147,538,679✔
180
}
135,284,793✔
181

182
inline int64_t get_direct(const char* data, size_t width, size_t ndx) noexcept
183
{
463,982,715✔
184
    REALM_TEMPEX(return get_direct, width, (data, ndx));
463,982,715✔
185
}
×
186

187
// An iterator for getting a 64 bit word from any (byte-address+bit-offset) address.
188
class UnalignedWordIter {
189
public:
190
    UnalignedWordIter(const uint64_t* data, size_t bit_offset)
191
        : m_word_ptr(data + (bit_offset >> 6))
5,243,409✔
192
        , m_in_word_offset(bit_offset & 0x3F)
5,243,409✔
193
    {
10,673,526✔
194
    }
10,673,526✔
195
    // 'num_bits' number of bits which must be read
196
    // WARNING returned word may be garbage above the first 'num_bits' bits.
197
    uint64_t consume(size_t num_bits)
198
    {
12,300,300✔
199
        auto first_word = m_word_ptr[0];
12,300,300✔
200
        uint64_t result = first_word >> m_in_word_offset;
12,300,300✔
201
        // note: above shifts in zeroes
202
        if (m_in_word_offset + num_bits > 64) {
12,300,300✔
203
            // if we're here, in_word_offset > 0
204
            auto first_word_size = 64 - m_in_word_offset;
8,373,519✔
205
            auto second_word = m_word_ptr[1];
8,373,519✔
206
            result |= second_word << first_word_size;
8,373,519✔
207
            // note: above shifts in zeroes below the bits we want
208
        }
8,373,519✔
209
        _bump(num_bits);
12,300,300✔
210
        return result;
12,300,300✔
211
    }
12,300,300✔
212
    uint64_t consume_with_unsafe_prefetch(size_t num_bits)
213
    {
82,227,501✔
214
        auto first_word = m_word_ptr[0];
82,227,501✔
215
        uint64_t result = first_word >> m_in_word_offset;
82,227,501✔
216
        // note: above shifts in zeroes
217
        auto first_word_size = 64 - m_in_word_offset;
82,227,501✔
218
        auto second_word = m_word_ptr[1];
82,227,501✔
219
        REALM_ASSERT_DEBUG(num_bits <= 64);
82,227,501✔
220
        if (num_bits > first_word_size)
82,227,501✔
221
            result |= second_word << first_word_size;
96,643,017✔
222
        // note: above shifts in zeroes below the bits we want
223
        _bump(num_bits);
82,227,501✔
224
        return result;
82,227,501✔
225
    }
82,227,501✔
226

227
private:
228
    const uint64_t* m_word_ptr;
229
    unsigned m_in_word_offset;
230

231
    // bump the iterator the specified number of bits
232
    void _bump(size_t num_bits)
233
    {
83,842,029✔
234
        auto total_offset = m_in_word_offset + num_bits;
83,842,029✔
235
        m_word_ptr += total_offset >> 6;
83,842,029✔
236
        m_in_word_offset = total_offset & 0x3F;
83,842,029✔
237
    }
83,842,029✔
238
};
239

240
// Read a bit field of up to 64 bits.
241
// - Any alignment and size is supported
242
// - The start of the 'data' area must be 64 bit aligned in all cases.
243
// - For fields of 64-bit or less, the first 64-bit word is filled with the zero-extended
244
//   value of the bitfield.
245
// iterator useful for scanning arrays faster than by indexing each element
246
// supports arrays of pairs by differentiating field size and step size.
247
class BfIterator {
248
    friend class FlexCompressor;
249
    friend class PackedCompressor;
250

251
public:
252
    BfIterator() = default;
253
    BfIterator(const BfIterator&) = default;
254
    BfIterator(BfIterator&&) = default;
255
    BfIterator& operator=(const BfIterator&) = default;
256
    BfIterator& operator=(BfIterator&&) = default;
257
    BfIterator(uint64_t* data_area, size_t initial_offset, uint8_t field_size, uint8_t step_size, size_t index)
258
        : data_area(data_area)
39,270,216✔
259
        , field_size(field_size)
39,270,216✔
260
        , step_size(step_size)
39,270,216✔
261
        , offset(initial_offset)
39,270,216✔
262
    {
80,736,402✔
263
        if (field_size < 64)
80,736,402✔
264
            mask = (1ULL << field_size) - 1;
70,796,988✔
265
        move(index);
80,736,402✔
266
    }
80,736,402✔
267

268
    inline uint64_t get_full_word_with_value() const
269
    {
108,509,391✔
270
        const auto in_word_position = field_position & 0x3F;
108,509,391✔
271
        const auto first_word = first_word_ptr[0];
108,509,391✔
272
        uint64_t result = first_word >> in_word_position;
108,509,391✔
273
        // note: above shifts in zeroes above the bitfield
274
        if (in_word_position + field_size > 64) {
108,509,391✔
275
            // if we're here, in_word_position > 0
276
            const auto first_word_size = 64 - in_word_position;
21,445,830✔
277
            const auto second_word = first_word_ptr[1];
21,445,830✔
278
            return result | second_word << first_word_size;
21,445,830✔
279
            // note: above shifts in zeroes below the bits we want
280
        }
21,445,830✔
281
        return result;
87,063,561✔
282
    }
108,509,391✔
283

284
    inline uint64_t get_value() const
285
    {
108,481,731✔
286
        auto result = get_full_word_with_value();
108,481,731✔
287
        // discard any bits above the field we want
288
        if (field_size < 64)
108,481,731✔
289
            result &= mask;
94,061,853✔
290
        return result;
108,481,731✔
291
    }
108,481,731✔
292

293
    // get unaligned word - this should not be called if the next word extends beyond
294
    // end of array. For that particular case, you must use get_last_unaligned_word instead.
295
    inline uint64_t get_unaligned_word() const
NEW
296
    {
×
NEW
297
        const auto in_word_position = field_position & 0x3F;
×
NEW
298
        const auto first_word = first_word_ptr[0];
×
NEW
299
        if (in_word_position == 0)
×
NEW
300
            return first_word;
×
NEW
301
        uint64_t result = first_word >> in_word_position;
×
NEW
302
        // note: above shifts in zeroes above the bitfield
×
NEW
303
        const auto first_word_size = 64 - in_word_position;
×
NEW
304
        const auto second_word = first_word_ptr[1];
×
NEW
305
        result |= second_word << first_word_size;
×
NEW
306
        // note: above shifts in zeroes below the bits we want
×
NEW
307
        return result;
×
NEW
308
    }
×
309

310
    inline uint64_t get_last_unaligned_word() const
NEW
311
    {
×
NEW
312
        const auto in_word_position = field_position & 0x3F;
×
NEW
313
        const auto first_word = first_word_ptr[0];
×
NEW
314
        const uint64_t result = first_word >> in_word_position;
×
NEW
315
        // note: above shifts in zeroes above the bitfield
×
NEW
316
        return result;
×
NEW
317
    }
×
318

319
    void set_value(uint64_t value) const
320
    {
18,131,244✔
321
        const auto in_word_position = field_position & 0x3F;
18,131,244✔
322
        auto first_word = first_word_ptr[0];
18,131,244✔
323
        uint64_t mask = 0ULL - 1ULL;
18,131,244✔
324
        if (field_size < 64) {
18,131,244✔
325
            mask = (1ULL << field_size) - 1;
16,835,544✔
326
            value &= mask;
16,835,544✔
327
        }
16,835,544✔
328
        // zero out field in first word:
329
        const auto first_word_mask = ~(mask << in_word_position);
18,131,244✔
330
        first_word &= first_word_mask;
18,131,244✔
331
        // or in relevant part of value
332
        first_word |= value << in_word_position;
18,131,244✔
333
        first_word_ptr[0] = first_word;
18,131,244✔
334
        if (in_word_position + field_size > 64) {
18,131,244✔
335
            // bitfield crosses word boundary.
336
            // discard the lowest bits of value (it has been written to the first word)
337
            const auto bits_written_to_first_word = 64 - in_word_position;
3,678,930✔
338
            // bit_written_to_first_word must be lower than 64, so shifts based on it are well defined
339
            value >>= bits_written_to_first_word;
3,678,930✔
340
            const auto second_word_mask = mask >> bits_written_to_first_word;
3,678,930✔
341
            auto second_word = first_word_ptr[1];
3,678,930✔
342
            // zero out the field in second word, then or in the (high part of) value
343
            second_word &= ~second_word_mask;
3,678,930✔
344
            second_word |= value;
3,678,930✔
345
            first_word_ptr[1] = second_word;
3,678,930✔
346
        }
3,678,930✔
347
    }
18,131,244✔
348
    inline void operator++()
349
    {
18,140,448✔
350
        const auto next_field_position = field_position + step_size;
18,140,448✔
351
        if ((next_field_position >> 6) > (field_position >> 6)) {
18,140,448✔
352
            first_word_ptr = data_area + (next_field_position >> 6);
5,511,018✔
353
        }
5,511,018✔
354
        field_position = next_field_position;
18,140,448✔
355
    }
18,140,448✔
356

357
    inline void move(size_t index)
358
    {
90,498,003✔
359
        field_position = offset + index * step_size;
90,498,003✔
360
        first_word_ptr = data_area + (field_position >> 6);
90,498,003✔
361
    }
90,498,003✔
362

363
    inline uint64_t operator*() const
364
    {
90,444,591✔
365
        return get_value();
90,444,591✔
366
    }
90,444,591✔
367

368
private:
369
    friend bool operator<(const BfIterator&, const BfIterator&);
370
    uint64_t* data_area = nullptr;
371
    uint64_t* first_word_ptr = nullptr;
372
    size_t field_position = 0;
373
    uint8_t field_size = 0;
374
    uint8_t step_size = 0; // may be different than field_size if used for arrays of pairs
375
    size_t offset = 0;
376
    uint64_t mask = 0;
377
};
378

379

380
inline bool operator<(const BfIterator& a, const BfIterator& b)
NEW
381
{
×
NEW
382
    REALM_ASSERT(a.data_area == b.data_area);
×
NEW
383
    return a.field_position < b.field_position;
×
NEW
384
}
×
385

386
inline uint64_t read_bitfield(uint64_t* data_area, size_t field_position, uint8_t width)
387
{
7,827,621✔
388
    BfIterator it(data_area, field_position, width, width, 0);
7,827,621✔
389
    return *it;
7,827,621✔
390
}
7,827,621✔
391

392
inline void write_bitfield(uint64_t* data_area, size_t field_position, uint8_t width, uint64_t value)
NEW
393
{
×
NEW
394
    BfIterator it(data_area, field_position, width, width, 0);
×
NEW
395
    it.set_value(value);
×
NEW
396
}
×
397

398
inline int64_t sign_extend_field_by_mask(uint64_t sign_mask, uint64_t value)
399
{
78,560,619✔
400
    uint64_t sign_extension = 0ULL - (value & sign_mask);
78,560,619✔
401
    return value | sign_extension;
78,560,619✔
402
}
78,560,619✔
403

404
inline int64_t sign_extend_value(size_t width, uint64_t value)
405
{
22,134,075✔
406
    uint64_t sign_mask = 1ULL << (width - 1);
22,134,075✔
407
    uint64_t sign_extension = 0ULL - (value & sign_mask);
22,134,075✔
408
    return value | sign_extension;
22,134,075✔
409
}
22,134,075✔
410

411
template <int width>
412
inline std::pair<int64_t, int64_t> get_two(const char* data, size_t ndx) noexcept
UNCOV
413
{
×
UNCOV
414
    return std::make_pair(to_size_t(get_direct<width>(data, ndx + 0)), to_size_t(get_direct<width>(data, ndx + 1)));
×
UNCOV
415
}
×
416

417
inline std::pair<int64_t, int64_t> get_two(const char* data, size_t width, size_t ndx) noexcept
UNCOV
418
{
×
UNCOV
419
    REALM_TEMPEX(return get_two, width, (data, ndx));
×
420
}
×
421

422
/* Subword parallel search
423

424
 The following provides facilities for subword parallel search for bitfields of any size.
425
 To simplify, the first bitfield must be aligned within the word: it must occupy the lowest
426
 bits of the word.
427

428
 In general the metods here return a vector with the most significant bit in each field
429
 marking that a condition was met when comparing the corresponding pair of fields in two
430
 vectors. Checking if any field meets a condition is as simple as comparing the return
431
 vector against 0. Finding the first to meet a condition is also supported.
432

433
 Vectors are "split" into fields according to a MSB vector, wich indicates the most
434
 significant bit of each field. The MSB must be passed in as an argument to most
435
 bit field comparison functions. It can be generated by the field_sign_bit<width> template.
436

437
 The simplest condition to test is any_field_NE(A,B), where A and B are words.
438
 This condition should be true if any bitfield in A is not equal to the corresponding
439
 field in B.
440

441
 This is almost as simple as a direct word compare, but needs to take into account that
442
 we may want to have part of the words undefined.
443
 */
444
constexpr uint8_t num_fields_table[65] = {0, 64, 32, 21, 16, 12, 10, 9, // 0-7
445
                                          8, 7,  6,  5,  5,  4,  4,  4, // 8-15
446
                                          4, 3,  3,  3,  3,  3,  2,  2, // 16-23
447
                                          2, 2,  2,  2,  2,  2,  2,  2, // 24-31
448
                                          2, 1,  1,  1,  1,  1,  1,  1, // 32-39
449
                                          1, 1,  1,  1,  1,  1,  1,  1, // 40-47
450
                                          1, 1,  1,  1,  1,  1,  1,  1, // 48-55
451
                                          1, 1,  1,  1,  1,  1,  1,  1, // 56-63
452
                                          1};
453

454
constexpr uint8_t num_bits_table[65] = {64, 64, 64, 63, 64, 60, 60, 63, // 0-7
455
                                        64, 63, 60, 55, 60, 52, 56, 60, // 8-15
456
                                        64, 51, 54, 57, 60, 63, 44, 46, // 16-23
457
                                        48, 50, 52, 54, 56, 58, 60, 62, // 24-31
458
                                        64, 33, 34, 35, 36, 37, 38, 39, // 32-39
459
                                        40, 41, 42, 43, 44, 45, 46, 47, // 40-47
460
                                        48, 49, 50, 51, 52, 53, 54, 55, // 48-55
461
                                        56, 57, 58, 59, 60, 61, 62, 63, // 56-63
462
                                        64};
463

464
inline uint8_t num_fields_for_width(uint8_t width)
465
{
10,594,305✔
466
    REALM_ASSERT_DEBUG(width);
10,594,305✔
467
    const auto retval = num_fields_table[width];
10,594,305✔
468
#ifdef REALM_DEBUG
10,594,305✔
469
    REALM_ASSERT_DEBUG(width == 0 || retval == int(64 / width));
10,594,305✔
470
#endif
10,594,305✔
471
    return retval;
10,594,305✔
472
}
10,594,305✔
473

474
inline uint8_t num_bits_for_width(uint8_t width)
475
{
10,944,843✔
476
    REALM_ASSERT_DEBUG(width);
10,944,843✔
477
    return num_bits_table[width];
10,944,843✔
478
}
10,944,843✔
479

480
inline uint64_t cares_about(uint8_t width)
481
{
130✔
482
    return 0xFFFFFFFFFFFFFFFFULL >> (64 - num_bits_table[width]);
130✔
483
}
130✔
484

485
// true if any field in A differs from corresponding field in B. If you also want
486
// to find which fields, use find_all_fields_NE instead.
487
bool inline any_field_NE(int width, uint64_t A, uint64_t B)
NEW
488
{
×
NEW
489
    return (A ^ B) & cares_about(width);
×
NEW
490
}
×
491

492
// Populate all fields in a vector with a given value of a give width.
493
// Bits outside of the given field are ignored.
494
constexpr uint64_t populate(size_t width, uint64_t value)
495
{
6,835,497✔
496
    value &= 0xFFFFFFFFFFFFFFFFULL >> (64 - width);
6,835,497✔
497
    if (width < 8) {
6,835,497✔
498
        value |= value << width;
12,084✔
499
        width <<= 1;
12,084✔
500
        value |= value << width;
12,084✔
501
        width <<= 1;
12,084✔
502
        value |= value << width;
12,084✔
503
        width <<= 1;
12,084✔
504
    }
12,084✔
505
    // width now in range 8..64
506
    if (width < 32) {
7,372,920✔
507
        value |= value << width;
7,370,187✔
508
        width <<= 1;
7,370,187✔
509
        value |= value << width;
7,370,187✔
510
        width <<= 1;
7,370,187✔
511
    }
7,370,187✔
512
    // width now in range 32..128
513
    if (width < 64) {
7,498,440✔
514
        value |= value << width;
7,498,440✔
515
    }
7,498,440✔
516
    return value;
6,835,497✔
517
}
6,835,497✔
518

519
// provides a set bit in pos 0 of each field, remaining bits zero
520
constexpr uint64_t field_bit0(int width)
NEW
521
{
×
NEW
522
    return populate(width, 1);
×
NEW
523
}
×
524

525
// provides a set sign-bit in each field, remaining bits zero
526
constexpr uint64_t field_sign_bit(int width)
NEW
527
{
×
NEW
528
    return populate(width, 1ULL << (width - 1));
×
NEW
529
}
×
530

531
constexpr uint32_t inverse_width[65] = {
532
    65536 * 64 / 1, // never used
533
    65536 * 64 / 1,  65536 * 64 / 2,  65536 * 64 / 3,  65536 * 64 / 4,  65536 * 64 / 5,  65536 * 64 / 6,
534
    65536 * 64 / 7,  65536 * 64 / 8,  65536 * 64 / 9,  65536 * 64 / 10, 65536 * 64 / 11, 65536 * 64 / 12,
535
    65536 * 64 / 13, 65536 * 64 / 14, 65536 * 64 / 15, 65536 * 64 / 16, 65536 * 64 / 17, 65536 * 64 / 18,
536
    65536 * 64 / 19, 65536 * 64 / 20, 65536 * 64 / 21, 65536 * 64 / 22, 65536 * 64 / 23, 65536 * 64 / 24,
537
    65536 * 64 / 25, 65536 * 64 / 26, 65536 * 64 / 27, 65536 * 64 / 28, 65536 * 64 / 29, 65536 * 64 / 30,
538
    65536 * 64 / 31, 65536 * 64 / 32, 65536 * 64 / 33, 65536 * 64 / 34, 65536 * 64 / 35, 65536 * 64 / 36,
539
    65536 * 64 / 37, 65536 * 64 / 38, 65536 * 64 / 39, 65536 * 64 / 40, 65536 * 64 / 41, 65536 * 64 / 42,
540
    65536 * 64 / 43, 65536 * 64 / 44, 65536 * 64 / 45, 65536 * 64 / 46, 65536 * 64 / 47, 65536 * 64 / 48,
541
    65536 * 64 / 49, 65536 * 64 / 50, 65536 * 64 / 51, 65536 * 64 / 52, 65536 * 64 / 53, 65536 * 64 / 54,
542
    65536 * 64 / 55, 65536 * 64 / 56, 65536 * 64 / 57, 65536 * 64 / 58, 65536 * 64 / 59, 65536 * 64 / 60,
543
    65536 * 64 / 61, 65536 * 64 / 62, 65536 * 64 / 63, 65536 * 64 / 64,
544
};
545

546
inline size_t countr_zero(uint64_t vector)
547
{
7,007,949✔
548
    unsigned long where;
7,007,949✔
549
#if defined(_WIN64)
550
    if (_BitScanForward64(&where, vector))
551
        return static_cast<int>(where);
552
    return 0;
553
#elif defined(_WIN32)
554
    uint32_t low = vector & 0xFFFFFFFF;
555
    if (low) {
556
        bool scan_ok = _BitScanForward(&where, low);
557
        REALM_ASSERT_DEBUG(scan_ok);
558
        return where;
559
    }
560
    else {
561
        low = vector >> 32;
562
        bool scan_ok = _BitScanForward(&where, low);
563
        REALM_ASSERT_DEBUG(scan_ok);
564
        return 32 + where;
565
    }
566
#else
567
    where = __builtin_ctzll(vector);
7,007,949✔
568
    return static_cast<int>(where);
7,007,949✔
569
#endif
7,007,949✔
570
}
7,007,949✔
571

572
inline size_t first_field_marked(size_t width, uint64_t vector)
573
{
7,079,226✔
574
    const auto lz = countr_zero(vector);
7,079,226✔
575
    const auto field = (lz * inverse_width[width]) >> 22;
7,079,226✔
576
    REALM_ASSERT_DEBUG(width != 0);
7,079,226✔
577
    REALM_ASSERT_DEBUG(field == (lz / width));
7,079,226✔
578
    return field;
7,079,226✔
579
}
7,079,226✔
580

581
template <typename VectorCompare>
582
size_t parallel_subword_find(VectorCompare vector_compare, const uint64_t* data, size_t offset, uint8_t width,
583
                             uint64_t MSBs, uint64_t search_vector, size_t start, size_t end)
584
{
9,964,971✔
585
    const auto field_count = num_fields_for_width(width);
9,964,971✔
586
    const auto bit_count_pr_iteration = num_bits_for_width(width);
9,964,971✔
587
    const size_t fast_scan_limit = 4 * bit_count_pr_iteration;
9,964,971✔
588
    // use signed to make it easier to ascertain correctness of loop condition below
589
    auto total_bit_count_left = (end - start) * width;
9,964,971✔
590
    REALM_ASSERT_DEBUG(end >= start);
9,964,971✔
591
    UnalignedWordIter it(data, offset + start * width);
9,964,971✔
592
    uint64_t found_vector = 0;
9,964,971✔
593
    while (total_bit_count_left >= fast_scan_limit) {
48,972,225✔
594
        // unrolling 2x
595
        const auto word0 = it.consume_with_unsafe_prefetch(bit_count_pr_iteration);
45,847,560✔
596
        const auto word1 = it.consume_with_unsafe_prefetch(bit_count_pr_iteration);
45,847,560✔
597
        auto found_vector0 = vector_compare(MSBs, word0, search_vector);
45,847,560✔
598
        auto found_vector1 = vector_compare(MSBs, word1, search_vector);
45,847,560✔
599
        if (found_vector0) {
45,847,560!
600
            const auto sub_word_index = first_field_marked(width, found_vector0);
6,150,096✔
601
            return start + sub_word_index;
6,150,096✔
602
        }
6,150,096✔
603
        if (found_vector1) {
39,697,464!
604
            const auto sub_word_index = first_field_marked(width, found_vector1);
690,210✔
605
            return start + field_count + sub_word_index;
690,210✔
606
        }
690,210✔
607
        total_bit_count_left -= 2 * bit_count_pr_iteration;
39,007,254✔
608
        start += 2 * field_count;
39,007,254✔
609
    }
39,007,254✔
610

611
    // One word at a time
612
    while (total_bit_count_left >= bit_count_pr_iteration) {
9,994,814✔
613
        const auto word = it.consume(bit_count_pr_iteration);
7,072,452✔
614
        found_vector = vector_compare(MSBs, word, search_vector);
7,072,452✔
615
        if (found_vector) {
7,072,452✔
616
            const auto sub_word_index = first_field_marked(width, found_vector);
202,303✔
617
            return start + sub_word_index;
202,303✔
618
        }
202,303✔
619
        total_bit_count_left -= bit_count_pr_iteration;
6,870,149✔
620
        start += field_count;
6,870,149✔
621
    }
6,870,149✔
622

623
    // final subword, may be partial
624
    if (total_bit_count_left) {
3,298,256✔
625
        // limit lookahead to avoid touching memory beyond array
626
        const auto word = it.consume(total_bit_count_left);
3,264,347✔
627
        found_vector = vector_compare(MSBs, word, search_vector);
3,264,347✔
628
        auto last_word_mask = 0xFFFFFFFFFFFFFFFFULL >> (64 - total_bit_count_left);
3,264,347✔
629
        found_vector &= last_word_mask;
3,264,347✔
630
        if (found_vector) {
3,264,347✔
631
            const auto sub_word_index = first_field_marked(width, found_vector);
53,234✔
632
            return start + sub_word_index;
53,234✔
633
        }
53,234✔
634
    }
3,264,347✔
635
    return end;
2,869,128✔
636
}
2,922,362✔
637

638

639
namespace impl {
640

641
template <int width>
642
inline int64_t default_fetcher(const char* data, size_t ndx)
643
{
87,240,393✔
644
    return get_direct<width>(data, ndx);
87,240,393✔
645
}
87,240,393✔
646

647
template <typename T>
648
struct CompressedDataFetcher {
649

650
    int64_t operator()(const char*, size_t ndx) const
651
    {
320✔
652
        return ptr->get(ndx);
320✔
653
    }
320✔
654
    const T* ptr;
655
};
656

657
// Lower and Upper bound are mainly used in the B+tree implementation,
658
// but also for indexing, we can exploit these functions when the array
659
// is encoded, just providing a way for fetching the data.
660
// In this case the width is going to be ignored.
661

662
// Lower/upper bound in sorted sequence
663
// ------------------------------------
664
//
665
//   3 3 3 4 4 4 5 6 7 9 9 9
666
//   ^     ^     ^     ^     ^
667
//   |     |     |     |     |
668
//   |     |     |     |      -- Lower and upper bound of 15
669
//   |     |     |     |
670
//   |     |     |      -- Lower and upper bound of 8
671
//   |     |     |
672
//   |     |      -- Upper bound of 4
673
//   |     |
674
//   |      -- Lower bound of 4
675
//   |
676
//    -- Lower and upper bound of 1
677
//
678
// These functions are semantically identical to std::lower_bound() and
679
// std::upper_bound().
680
//
681
// We currently use binary search. See for example
682
// http://www.tbray.org/ongoing/When/200x/2003/03/22/Binary.
683
template <typename F>
684
inline size_t lower_bound(const char* data, size_t start, size_t end, int64_t value,
685
                          F fetcher = default_fetcher) noexcept
686
{
16,671,344✔
687
    // The binary search used here is carefully optimized. Key trick is to use a single
688
    // loop controlling variable (size) instead of high/low pair, and to keep updates
689
    // to size done inside the loop independent of comparisons. Further key to speed
690
    // is to avoid branching inside the loop, using conditional moves instead. This
691
    // provides robust performance for random searches, though predictable searches
692
    // might be slightly faster if we used branches instead. The loop unrolling yields
693
    // a final 5-20% speedup depending on circumstances.
694

695
    // size_t low = 0;
696
    REALM_ASSERT_DEBUG(end >= start);
16,671,344✔
697
    size_t size = end - start;
16,671,344✔
698
    // size_t low = 0;
699
    size_t low = start;
16,671,344✔
700

701
    while (size >= 8) {
21,816,931!
702
        // The following code (at X, Y and Z) is 3 times manually unrolled instances of (A) below.
703
        // These code blocks must be kept in sync. Meassurements indicate 3 times unrolling to give
704
        // the best performance. See (A) for comments on the loop body.
705
        // (X)
706
        size_t half = size / 2;
5,145,587✔
707
        size_t other_half = size - half;
5,145,587✔
708
        size_t probe = low + half;
5,145,587✔
709
        size_t other_low = low + other_half;
5,145,587✔
710
        int64_t v = fetcher(data, probe);
5,145,587✔
711
        size = half;
5,145,587✔
712
        low = (v < value) ? other_low : low;
5,145,587!
713

714
        // (Y)
715
        half = size / 2;
5,145,587✔
716
        other_half = size - half;
5,145,587✔
717
        probe = low + half;
5,145,587✔
718
        other_low = low + other_half;
5,145,587✔
719
        v = fetcher(data, probe);
5,145,587✔
720
        size = half;
5,145,587✔
721
        low = (v < value) ? other_low : low;
5,145,587!
722

723
        // (Z)
724
        half = size / 2;
5,145,587✔
725
        other_half = size - half;
5,145,587✔
726
        probe = low + half;
5,145,587✔
727
        other_low = low + other_half;
5,145,587✔
728
        v = fetcher(data, probe);
5,145,587✔
729
        size = half;
5,145,587✔
730
        low = (v < value) ? other_low : low;
5,145,587✔
731
    }
5,145,587✔
732
    while (size > 0) {
44,459,007!
733
        // (A)
734
        // To understand the idea in this code, please note that
735
        // for performance, computation of size for the next iteration
736
        // MUST be INDEPENDENT of the conditional. This allows the
737
        // processor to unroll the loop as fast as possible, and it
738
        // minimizes the length of dependence chains leading up to branches.
739
        // Making the unfolding of the loop independent of the data being
740
        // searched, also minimizes the delays incurred by branch
741
        // mispredictions, because they can be determined earlier
742
        // and the speculation corrected earlier.
743

744
        // Counterintuitive:
745
        // To make size independent of data, we cannot always split the
746
        // range at the theoretical optimal point. When we determine that
747
        // the key is larger than the probe at some index K, and prepare
748
        // to search the upper part of the range, you would normally start
749
        // the search at the next index, K+1, to get the shortest range.
750
        // We can only do this when splitting a range with odd number of entries.
751
        // If there is an even number of entries we search from K instead of K+1.
752
        // This potentially leads to redundant comparisons, but in practice we
753
        // gain more performance by making the changes to size predictable.
754

755
        // if size is even, half and other_half are the same.
756
        // if size is odd, half is one less than other_half.
757
        size_t half = size / 2;
27,787,663✔
758
        size_t other_half = size - half;
27,787,663✔
759
        size_t probe = low + half;
27,787,663✔
760
        size_t other_low = low + other_half;
27,787,663✔
761
        int64_t v = fetcher(data, probe);
27,787,663✔
762
        size = half;
27,787,663✔
763
        // for max performance, the line below should compile into a conditional
764
        // move instruction. Not all compilers do this. To maximize chance
765
        // of succes, no computation should be done in the branches of the
766
        // conditional.
767
        low = (v < value) ? other_low : low;
27,787,663✔
768
    };
27,787,663✔
769

770
    return low;
16,671,344✔
771
}
16,671,344✔
772

773
// See lower_bound()
774
template <typename F>
775
inline size_t upper_bound(const char* data, size_t start, size_t end, int64_t value,
776
                          F fetcher = default_fetcher) noexcept
777
{
5,492,966✔
778
    REALM_ASSERT_DEBUG(end >= start);
5,492,966✔
779
    size_t size = end - start;
5,492,966✔
780
    // size_t low = 0;
781
    size_t low = start;
5,492,966✔
782
    while (size >= 8) {
16,918,708✔
783
        size_t half = size / 2;
11,425,742✔
784
        size_t other_half = size - half;
11,425,742✔
785
        size_t probe = low + half;
11,425,742✔
786
        size_t other_low = low + other_half;
11,425,742✔
787
        int64_t v = fetcher(data, probe);
11,425,742✔
788
        size = half;
11,425,742✔
789
        low = (value >= v) ? other_low : low;
11,425,742✔
790

791
        half = size / 2;
11,425,742✔
792
        other_half = size - half;
11,425,742✔
793
        probe = low + half;
11,425,742✔
794
        other_low = low + other_half;
11,425,742✔
795
        v = fetcher(data, probe);
11,425,742✔
796
        size = half;
11,425,742✔
797
        low = (value >= v) ? other_low : low;
11,425,742!
798

799
        half = size / 2;
11,425,742✔
800
        other_half = size - half;
11,425,742✔
801
        probe = low + half;
11,425,742✔
802
        other_low = low + other_half;
11,425,742✔
803
        v = fetcher(data, probe);
11,425,742✔
804
        size = half;
11,425,742✔
805
        low = (value >= v) ? other_low : low;
11,425,742!
806
    }
11,425,742✔
807

808
    while (size > 0) {
15,460,455✔
809
        size_t half = size / 2;
9,967,489✔
810
        size_t other_half = size - half;
9,967,489✔
811
        size_t probe = low + half;
9,967,489✔
812
        size_t other_low = low + other_half;
9,967,489✔
813
        int64_t v = fetcher(data, probe);
9,967,489✔
814
        size = half;
9,967,489✔
815
        low = (value >= v) ? other_low : low;
9,967,489✔
816
    };
9,967,489✔
817

818
    return low;
5,492,966✔
819
}
5,492,966✔
820
} // namespace impl
821

822
template <int width>
823
inline size_t lower_bound(const char* data, size_t size, int64_t value) noexcept
824
{
16,661,430✔
825
    return impl::lower_bound(data, 0, size, value, impl::default_fetcher<width>);
16,661,430✔
826
}
16,661,430✔
827

828
template <typename T>
829
inline size_t lower_bound(const char* data, size_t size, int64_t value,
830
                          const impl::CompressedDataFetcher<T>& fetcher) noexcept
831
{
32✔
832
    return impl::lower_bound(data, 0, size, value, fetcher);
32✔
833
}
32✔
834

835
template <int width>
836
inline size_t upper_bound(const char* data, size_t size, int64_t value) noexcept
837
{
5,493,120✔
838
    return impl::upper_bound(data, 0, size, value, impl::default_fetcher<width>);
5,493,120✔
839
}
5,493,120✔
840

841
template <typename T>
842
inline size_t upper_bound(const char* data, size_t size, int64_t value,
843
                          const impl::CompressedDataFetcher<T>& fetcher) noexcept
844
{
32✔
845
    return impl::upper_bound(data, 0, size, value, fetcher);
32✔
846
}
32✔
847

848
} // namespace realm
849

850
#endif /* ARRAY_TPL_HPP_ */
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc