• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2093

02 Mar 2024 12:43AM UTC coverage: 90.92%. Remained the same
2093

push

Evergreen

web-flow
Use updated curl on evergreen windows hosts (#7409)

93896 of 173116 branches covered (54.24%)

238389 of 262196 relevant lines covered (90.92%)

5713411.51 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.52
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/list.hpp>
31
#include <realm/timestamp.hpp>
32
#include <realm/column_integer.hpp>
33
#include <realm/unicode.hpp>
34
#include <realm/tokenizer.hpp>
35

36
using namespace realm;
37
using namespace realm::util;
38

39
namespace {
40

41
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
42
{
21,544,158✔
43
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
21,544,158✔
44
    child.init_from_ref(child_ref);
21,544,158✔
45
    child.set_parent(&parent, child_ref_ndx);
21,544,158✔
46
}
21,544,158✔
47

48
// This method reconstructs the string inserted in the search index based on a string
49
// that matches so far and the last key (only works if complete strings are stored in the index)
50
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
51
{
1,146✔
52
    if (key == 0)
1,146✔
53
        return StringData();
×
54

573✔
55
    size_t rest_len = 4;
1,146✔
56
    char* k = reinterpret_cast<char*>(&key);
1,146✔
57
    if (k[0] == 'X')
1,146✔
58
        rest_len = 3;
234✔
59
    else if (k[1] == 'X')
912✔
60
        rest_len = 2;
384✔
61
    else if (k[2] == 'X')
528✔
62
        rest_len = 1;
228✔
63
    else if (k[3] == 'X')
300✔
64
        rest_len = 0;
48✔
65

573✔
66
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,146✔
67

573✔
68
    return StringData(new_string.data(), offset + rest_len);
1,146✔
69
}
1,146✔
70

71
template <IndexMethod method>
72
int64_t from_list_full_word(InternalFindResult&, const IntegerColumn&);
73

74
template <>
75
inline int64_t from_list_full_word<index_FindFirst>(InternalFindResult&, const IntegerColumn& key_values)
76
{
×
77
    return key_values.get(0);
×
78
}
×
79

80
template <>
81
inline int64_t from_list_full_word<index_Count>(InternalFindResult&, const IntegerColumn& key_values)
82
{
×
83
    return int64_t(key_values.size());
×
84
}
×
85

86
template <>
87
inline int64_t from_list_full_word<index_FindAll_nocopy>(InternalFindResult& result_ref,
88
                                                         const IntegerColumn& key_values)
89
{
288✔
90
    result_ref.payload = from_ref(key_values.get_ref());
288✔
91
    result_ref.start_ndx = 0;
288✔
92
    result_ref.end_ndx = key_values.size();
288✔
93
    return size_t(FindRes_column);
288✔
94
}
288✔
95

96
} // anonymous namespace
97

98
DataType ClusterColumn::get_data_type() const
99
{
×
100
    const Table* table = m_cluster_tree->get_owning_table();
×
101
    return table->get_column_type(m_column_key);
×
102
}
×
103

104
Mixed ClusterColumn::get_value(ObjKey key) const
105
{
11,313,888✔
106
    const Obj obj{m_cluster_tree->get(key)};
11,313,888✔
107
    return obj.get_any(m_column_key);
11,313,888✔
108
}
11,313,888✔
109

110
Lst<String> ClusterColumn::get_list(ObjKey key) const
111
{
6✔
112
    const Obj obj{m_cluster_tree->get(key)};
6✔
113
    return obj.get_list<String>(m_column_key);
6✔
114
}
6✔
115

116
std::vector<ObjKey> ClusterColumn::get_all_keys() const
117
{
18✔
118
    std::vector<ObjKey> ret;
18✔
119
    ret.reserve(m_cluster_tree->size());
18✔
120
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
121
        auto sz = cluster->node_size();
18✔
122
        for (size_t i = 0; i < sz; i++) {
198✔
123
            ret.push_back(cluster->get_real_key(i));
180✔
124
        }
180✔
125

9✔
126
        return IteratorControl::AdvanceToNext;
18✔
127
    });
18✔
128
    return ret;
18✔
129
}
18✔
130

131
template <>
132
int64_t IndexArray::from_list<index_FindFirst>(const Mixed& value, InternalFindResult& /* result_ref */,
133
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
134
{
5,415✔
135
    SortedListComparator slc(column);
5,415✔
136

2,910✔
137
    IntegerColumn::const_iterator it_end = key_values.cend();
5,415✔
138
    IntegerColumn::const_iterator lower = slc.find_start_of_unsorted(value, key_values);
5,415✔
139

2,910✔
140
    if (lower == it_end)
5,415✔
141
        return null_key.value;
24✔
142

2,898✔
143
    int64_t first_key_value = *lower;
5,391✔
144

2,898✔
145
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,391✔
146
    if (actual != value)
5,391✔
147
        return null_key.value;
×
148

2,898✔
149
    return first_key_value;
5,391✔
150
}
5,391✔
151

152
template <>
153
int64_t IndexArray::from_list<index_Count>(const Mixed& value, InternalFindResult& /* result_ref */,
154
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
155
{
8,460✔
156
    SortedListComparator slc(column);
8,460✔
157

4,197✔
158
    IntegerColumn::const_iterator it_end = key_values.cend();
8,460✔
159
    IntegerColumn::const_iterator lower = slc.find_start_of_unsorted(value, key_values);
8,460✔
160
    if (lower == it_end)
8,460✔
161
        return 0;
24✔
162

4,185✔
163
    int64_t first_key_value = *lower;
8,436✔
164

4,185✔
165
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,436✔
166
    if (actual != value)
8,436✔
167
        return 0;
24✔
168

4,173✔
169
    IntegerColumn::const_iterator upper = slc.find_end_of_unsorted(value, key_values, lower);
8,412✔
170
    int64_t cnt = upper - lower;
8,412✔
171

4,173✔
172
    return cnt;
8,412✔
173
}
8,412✔
174

175
template <>
176
int64_t IndexArray::from_list<index_FindAll_nocopy>(const Mixed& value, InternalFindResult& result_ref,
177
                                                    const IntegerColumn& key_values,
178
                                                    const ClusterColumn& column) const
179
{
2,505✔
180
    SortedListComparator slc(column);
2,505✔
181
    IntegerColumn::const_iterator it_end = key_values.cend();
2,505✔
182
    IntegerColumn::const_iterator lower = slc.find_start_of_unsorted(value, key_values);
2,505✔
183

1,233✔
184
    if (lower == it_end)
2,505✔
185
        return size_t(FindRes_not_found);
57✔
186

1,206✔
187
    ObjKey first_key = ObjKey(*lower);
2,448✔
188

1,206✔
189
    Mixed actual = column.get_value(ObjKey(first_key));
2,448✔
190
    if (actual != value)
2,448✔
191
        return size_t(FindRes_not_found);
57✔
192

1,176✔
193
    // Optimization: check the last entry before trying upper bound.
1,176✔
194
    IntegerColumn::const_iterator upper = it_end;
2,391✔
195
    --upper;
2,391✔
196
    // Single result if upper matches lower
1,176✔
197
    if (upper == lower) {
2,391✔
198
        result_ref.payload = *lower;
303✔
199
        return size_t(FindRes_single);
303✔
200
    }
303✔
201

1,035✔
202
    // Check string value at upper, if equal return matches in (lower, upper]
1,035✔
203
    ObjKey last_key = ObjKey(*upper);
2,088✔
204
    actual = column.get_value(ObjKey(last_key));
2,088✔
205
    if (actual == value) {
2,088✔
206
        result_ref.payload = from_ref(key_values.get_ref());
1,395✔
207
        result_ref.start_ndx = lower.get_position();
1,395✔
208
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,395✔
209
        return size_t(FindRes_column);
1,395✔
210
    }
1,395✔
211

333✔
212
    // Last result is not equal, find the upper bound of the range of results.
333✔
213
    // Note that we are passing upper which is cend() - 1 here as we already
333✔
214
    // checked the last item manually.
333✔
215
    upper = slc.find_end_of_unsorted(value, key_values, lower);
693✔
216

333✔
217
    result_ref.payload = from_ref(key_values.get_ref());
693✔
218
    result_ref.start_ndx = lower.get_position();
693✔
219
    result_ref.end_ndx = upper.get_position();
693✔
220
    return int64_t(FindRes_column);
693✔
221
}
693✔
222

223

224
template <IndexMethod method>
225
int64_t IndexArray::index_string(const Mixed& value, InternalFindResult& result_ref,
226
                                 const ClusterColumn& column) const
227
{
1,960,017✔
228
    // Return`realm::not_found`, or an index to the (any) match
970,944✔
229
    constexpr bool first(method == index_FindFirst);
1,960,017✔
230
    // Return 0, or the number of items that match the specified `value`
970,944✔
231
    constexpr bool get_count(method == index_Count);
1,960,017✔
232
    // Same as `index_FindAll` but does not copy matching rows into `column`
970,944✔
233
    // returns FindRes_not_found if there are no matches
970,944✔
234
    // returns FindRes_single and the row index (literal) in result_ref.payload
970,944✔
235
    // or returns FindRes_column and the reference to a column of duplicates in
970,944✔
236
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
970,944✔
237
    constexpr bool allnocopy(method == index_FindAll_nocopy);
1,960,017✔
238

970,944✔
239
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
1,960,017✔
240

970,944✔
241
    const char* data = m_data;
1,960,017✔
242
    const char* header;
1,960,017✔
243
    uint_least8_t width = m_width;
1,960,017✔
244
    bool is_inner_node = m_is_inner_bptree_node;
1,960,017✔
245
    typedef StringIndex::key_type key_type;
1,960,017✔
246
    size_t stringoffset = 0;
1,960,017✔
247

970,944✔
248
    StringConversionBuffer buffer;
1,960,017✔
249
    StringData index_data = value.get_index_data(buffer);
1,960,017✔
250

970,944✔
251
    // Create 4 byte index key
970,944✔
252
    key_type key = StringIndex::create_key(index_data, stringoffset);
1,960,017✔
253

970,944✔
254
    for (;;) {
2,533,488✔
255
        // Get subnode table
1,247,577✔
256
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
2,533,488✔
257

1,247,577✔
258
        // Find the position matching the key
1,247,577✔
259
        const char* offsets_header = m_alloc.translate(offsets_ref);
2,533,488✔
260
        const char* offsets_data = get_data_from_header(offsets_header);
2,533,488✔
261
        size_t offsets_size = get_size_from_header(offsets_header);
2,533,488✔
262
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,533,488✔
263

1,247,577✔
264
        // If key is outside range, we know there can be no match
1,247,577✔
265
        if (pos == offsets_size)
2,533,488✔
266
            return local_not_found;
841,791✔
267

829,185✔
268
        // Get entry under key
829,185✔
269
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,691,697✔
270
        uint64_t ref = get_direct(data, width, pos_refs);
1,691,697✔
271

829,185✔
272
        if (is_inner_node) {
1,691,697✔
273
            // Set vars for next iteration
53,547✔
274
            header = m_alloc.translate(to_ref(ref));
116,049✔
275
            data = get_data_from_header(header);
116,049✔
276
            width = get_width_from_header(header);
116,049✔
277
            is_inner_node = get_is_inner_bptree_node_from_header(header);
116,049✔
278
            continue;
116,049✔
279
        }
116,049✔
280

775,638✔
281
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,575,648✔
282

775,638✔
283
        if (stored_key != key)
1,575,648✔
284
            return local_not_found;
241,614✔
285

663,441✔
286
        // Literal row index (tagged)
663,441✔
287
        if (ref & 1) {
1,334,034✔
288
            int64_t key_value = int64_t(ref >> 1);
860,076✔
289

432,000✔
290
            Mixed a = column.full_word() ? reconstruct_string(stringoffset, key, index_data)
432,084✔
291
                                         : column.get_value(ObjKey(key_value));
859,992✔
292
            if (a == value) {
860,076✔
293
                result_ref.payload = key_value;
857,139✔
294
                return first ? key_value : get_count ? 1 : FindRes_single;
848,787✔
295
            }
857,139✔
296
            return local_not_found;
2,937✔
297
        }
2,937✔
298

231,441✔
299
        const char* sub_header = m_alloc.translate(ref_type(ref));
473,958✔
300
        const bool sub_isindex = get_context_flag_from_header(sub_header);
473,958✔
301

231,441✔
302
        // List of row indices with common prefix up to this point, in sorted order.
231,441✔
303
        if (!sub_isindex) {
473,958✔
304
            const IntegerColumn sub(m_alloc, ref_type(ref));
16,668✔
305
            if (column.full_word()) {
16,668✔
306
                return from_list_full_word<method>(result_ref, sub);
288✔
307
            }
288✔
308

8,340✔
309
            return from_list<method>(value, result_ref, sub, column);
16,380✔
310
        }
16,380✔
311

222,957✔
312
        // Recurse into sub-index;
222,957✔
313
        header = sub_header;
457,290✔
314
        data = get_data_from_header(header);
457,290✔
315
        width = get_width_from_header(header);
457,290✔
316
        is_inner_node = get_is_inner_bptree_node_from_header(header);
457,290✔
317

222,957✔
318
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
222,957✔
319
        stringoffset += 4;
457,290✔
320

222,957✔
321
        // Update 4 byte index key
222,957✔
322
        key = StringIndex::create_key(index_data, stringoffset);
457,290✔
323
    }
457,290✔
324
}
1,960,017✔
325

326

327
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
328
                                   const ClusterColumn& column) const
329
{
1,422✔
330
    // optimization for the most common case, where all the strings under a given subindex are equal
711✔
331
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
1,422✔
332
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
333
    if (first == last) {
1,422✔
334
        if (!first.is_type(type_String))
102✔
335
            return;
×
336
        auto first_str_upper = case_map(first.get_string(), true);
102✔
337
        if (first_str_upper != upper_value) {
102✔
338
            return;
24✔
339
        }
24✔
340

39✔
341
        size_t sz = result.size() + rows.size();
78✔
342
        result.reserve(sz);
78✔
343
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
246✔
344
            result.push_back(ObjKey(*it));
168✔
345
        }
168✔
346
        return;
78✔
347
    }
78✔
348

660✔
349
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
350
    // same subindex column, but still not be identical
660✔
351
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
66,792✔
352
        ObjKey key = ObjKey(*it);
65,472✔
353
        Mixed val = column.get_value(key);
65,472✔
354
        if (val.is_type(type_String)) {
65,472✔
355
            auto upper_str = case_map(val.get_string(), true);
65,472✔
356
            if (upper_str == upper_value) {
65,472✔
357
                result.push_back(key);
1,440✔
358
            }
1,440✔
359
        }
65,472✔
360
    }
65,472✔
361

660✔
362
    return;
1,320✔
363
}
1,320✔
364

365

366
void IndexArray::from_list_all(const Mixed& value, std::vector<ObjKey>& result, const IntegerColumn& rows,
367
                               const ClusterColumn& column) const
368
{
39,021✔
369
    if (column.full_word()) {
39,021✔
370
        result.reserve(rows.size());
60✔
371
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
186✔
372
            result.push_back(ObjKey(*it));
126✔
373
        }
126✔
374

30✔
375
        return;
60✔
376
    }
60✔
377

19,812✔
378
    SortedListComparator slc(column);
38,961✔
379

19,812✔
380
    IntegerColumn::const_iterator it_end = rows.cend();
38,961✔
381
    IntegerColumn::const_iterator lower = slc.find_start_of_unsorted(value, rows);
38,961✔
382
    if (lower == it_end)
38,961✔
383
        return;
×
384

19,812✔
385
    ObjKey key = ObjKey(*lower);
38,961✔
386

19,812✔
387
    Mixed a = column.get_value(key);
38,961✔
388
    if (a != value)
38,961✔
389
        return;
×
390

19,812✔
391
    IntegerColumn::const_iterator upper = slc.find_end_of_unsorted(value, rows, lower);
38,961✔
392

19,812✔
393
    // Copy all matches into result column
19,812✔
394
    size_t sz = result.size() + (upper - lower);
38,961✔
395
    result.reserve(sz);
38,961✔
396
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
723,225✔
397
        result.push_back(ObjKey(*it));
684,264✔
398
    }
684,264✔
399
}
38,961✔
400

401

402
namespace {
403

404
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
405

406
// replicates the 4 least significant bits each times 8
407
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
408
uint32_t replicate_4_lsb_x8(uint32_t i)
409
{
3,121,365✔
410
    REALM_ASSERT_DEBUG(i <= 15);
3,121,365✔
411
    i *= 0x204081;
3,121,365✔
412
    i &= 0x1010101;
3,121,365✔
413
    i *= 0xff;
3,121,365✔
414
    return i;
3,121,365✔
415
}
3,121,365✔
416

417
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
3,120,750✔
418
    return a ^ ((a ^ b) & mask);
3,120,750✔
419
}
3,120,750✔
420

421
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
422
// final key:
423
// Permutation 0  = "ABCD"
424
// Permutation 1  = "ABCd"
425
// Permutation 8  = "aBCD"
426
// Permutation 15 = "abcd"
427
using key_type = StringIndex::key_type;
428
static key_type generate_key(key_type upper, key_type lower, int permutation)
429
{
3,121,353✔
430
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
3,121,353✔
431
}
3,121,353✔
432

433

434
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
435
// when traversing the trees.
436
struct SearchList {
437
    struct Item {
438
        const char* header;
439
        size_t string_offset;
440
        key_type key;
441
    };
442

443
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
444
        : m_upper_value(upper_value)
445
        , m_lower_value(lower_value)
446
    {
6,582✔
447
        m_keys_seen.reserve(num_permutations);
6,582✔
448
    }
6,582✔
449

450
    // Add all unique keys for this level to the internal work stack
451
    void add_all_for_level(const char* header, size_t string_offset)
452
    {
195,186✔
453
        m_keys_seen.clear();
195,186✔
454
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,186✔
455
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,186✔
456
        for (int p = 0; p < num_permutations; ++p) {
3,316,674✔
457
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,559,616✔
458
            // size) being combined incorrectly.
1,559,616✔
459
            const key_type key = generate_key(upper_key, lower_key, p);
3,121,488✔
460
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
3,121,488✔
461
            if (new_key) {
3,121,488✔
462
                m_keys_seen.push_back(key);
3,056,925✔
463
                add_next(header, string_offset, key);
3,056,925✔
464
            }
3,056,925✔
465
        }
3,121,488✔
466
    }
195,186✔
467

468
    bool empty() const
469
    {
3,056,076✔
470
        return m_items.empty();
3,056,076✔
471
    }
3,056,076✔
472

473
    Item get_next()
474
    {
3,053,931✔
475
        Item item = m_items.back();
3,053,931✔
476
        m_items.pop_back();
3,053,931✔
477
        return item;
3,053,931✔
478
    }
3,053,931✔
479

480
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
481
    void add_next(const char* header, size_t string_offset, key_type key)
482
    {
3,055,161✔
483
        m_items.push_back({header, string_offset, key});
3,055,161✔
484
    }
3,055,161✔
485

486
private:
487
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
488

489
    std::vector<Item> m_items;
490

491
    const util::Optional<std::string> m_upper_value;
492
    const util::Optional<std::string> m_lower_value;
493

494
    std::vector<key_type> m_keys_seen;
495
};
496

497

498
} // namespace
499

500

501
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
502
                                      const ClusterColumn& column) const
503
{
6,582✔
504
    REALM_ASSERT(!value.is_null());
6,582✔
505

3,291✔
506
    const util::Optional<std::string> upper_value = case_map(value, true);
6,582✔
507
    const util::Optional<std::string> lower_value = case_map(value, false);
6,582✔
508
    SearchList search_list(upper_value, lower_value);
6,582✔
509

3,291✔
510
    const char* top_header = get_header_from_data(m_data);
6,582✔
511
    search_list.add_all_for_level(top_header, 0);
6,582✔
512

3,291✔
513
    while (!search_list.empty()) {
3,060,861✔
514
        SearchList::Item item = search_list.get_next();
3,054,279✔
515

1,524,426✔
516
        const char* const header = item.header;
3,054,279✔
517
        const size_t string_offset = item.string_offset;
3,054,279✔
518
        const key_type key = item.key;
3,054,279✔
519
        const char* const data = get_data_from_header(header);
3,054,279✔
520
        const uint_least8_t width = get_width_from_header(header);
3,054,279✔
521
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,054,279✔
522

1,524,426✔
523
        // Get subnode table
1,524,426✔
524
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
3,054,279✔
525

1,524,426✔
526
        // Find the position matching the key
1,524,426✔
527
        const char* const offsets_header = m_alloc.translate(offsets_ref);
3,054,279✔
528
        const char* const offsets_data = get_data_from_header(offsets_header);
3,054,279✔
529
        const size_t offsets_size = get_size_from_header(offsets_header);
3,054,279✔
530
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,054,279✔
531

1,524,426✔
532
        // If key is outside range, we know there can be no match
1,524,426✔
533
        if (pos == offsets_size)
3,054,279✔
534
            continue;
858✔
535

1,523,997✔
536
        // Get entry under key
1,523,997✔
537
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
3,053,421✔
538
        const uint64_t ref = get_direct(data, width, pos_refs);
3,053,421✔
539

1,523,997✔
540
        if (is_inner_node) {
3,053,421✔
541
            // Set vars for next iteration
542
            const char* const inner_header = m_alloc.translate(to_ref(ref));
×
543
            search_list.add_next(inner_header, string_offset, key);
×
544
            continue;
×
545
        }
×
546

1,523,997✔
547
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
3,053,421✔
548

1,523,997✔
549
        if (stored_key != key)
3,053,421✔
550
            continue;
2,853,147✔
551

102,414✔
552
        // Literal row index (tagged)
102,414✔
553
        if (ref & 1) {
200,274✔
554
            ObjKey k(int64_t(ref >> 1));
5,646✔
555

2,823✔
556
            // The buffer is needed when for when this is an integer index.
2,823✔
557
            StringConversionBuffer buffer;
5,646✔
558
            const StringData str = column.get_value(k).get_index_data(buffer);
5,646✔
559
            const util::Optional<std::string> upper_str = case_map(str, true);
5,646✔
560
            if (upper_str == upper_value) {
5,646✔
561
                result.push_back(k);
5,598✔
562
            }
5,598✔
563
            continue;
5,646✔
564
        }
5,646✔
565

99,591✔
566
        const char* const sub_header = m_alloc.translate(ref_type(ref));
194,628✔
567
        const bool sub_isindex = get_context_flag_from_header(sub_header);
194,628✔
568

99,591✔
569
        // List of row indices with common prefix up to this point, in sorted order.
99,591✔
570
        if (!sub_isindex) {
194,628✔
571
            const IntegerColumn sub(m_alloc, ref_type(ref));
1,422✔
572
            from_list_all_ins(upper_value, result, sub, column);
1,422✔
573
            continue;
1,422✔
574
        }
1,422✔
575

98,880✔
576
        // Recurse into sub-index;
98,880✔
577
        search_list.add_all_for_level(sub_header, string_offset + 4);
193,206✔
578
    }
193,206✔
579

3,291✔
580
    // sort the result and return a std::vector
3,291✔
581
    std::sort(result.begin(), result.end());
6,582✔
582
}
6,582✔
583

584

585
void IndexArray::index_string_all(const Mixed& value, std::vector<ObjKey>& result, const ClusterColumn& column) const
586
{
47,742✔
587
    const char* data = m_data;
47,742✔
588
    const char* header;
47,742✔
589
    uint_least8_t width = m_width;
47,742✔
590
    bool is_inner_node = m_is_inner_bptree_node;
47,742✔
591
    size_t stringoffset = 0;
47,742✔
592

24,135✔
593
    StringConversionBuffer buffer;
47,742✔
594
    StringData index_data = value.get_index_data(buffer);
47,742✔
595
    // Create 4 byte index key
24,135✔
596
    key_type key = StringIndex::create_key(index_data, stringoffset);
47,742✔
597

24,135✔
598
    for (;;) {
79,383✔
599
        // Get subnode table
39,081✔
600
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
79,383✔
601

39,081✔
602
        // Find the position matching the key
39,081✔
603
        const char* offsets_header = m_alloc.translate(offsets_ref);
79,383✔
604
        const char* offsets_data = get_data_from_header(offsets_header);
79,383✔
605
        size_t offsets_size = get_size_from_header(offsets_header);
79,383✔
606
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
79,383✔
607

39,081✔
608
        // If key is outside range, we know there can be no match
39,081✔
609
        if (pos == offsets_size)
79,383✔
610
            return;
12✔
611

39,075✔
612
        // Get entry under key
39,075✔
613
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
79,371✔
614
        uint64_t ref = get_direct(data, width, pos_refs);
79,371✔
615

39,075✔
616
        if (is_inner_node) {
79,371✔
617
            // Set vars for next iteration
3✔
618
            header = m_alloc.translate(ref_type(ref));
6✔
619
            data = get_data_from_header(header);
6✔
620
            width = get_width_from_header(header);
6✔
621
            is_inner_node = get_is_inner_bptree_node_from_header(header);
6✔
622
            continue;
6✔
623
        }
6✔
624

39,072✔
625
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
79,365✔
626

39,072✔
627
        if (stored_key != key)
79,365✔
628
            return;
6,126✔
629

36,009✔
630
        // Literal row index (tagged)
36,009✔
631
        if (ref & 1) {
73,239✔
632
            ObjKey k(int64_t(ref >> 1));
2,583✔
633

1,224✔
634
            if (column.full_word() || column.get_value(k) == value) {
2,583✔
635
                result.push_back(k);
2,583✔
636
                return;
2,583✔
637
            }
2,583✔
638
            return;
×
639
        }
×
640

34,785✔
641
        const char* sub_header = m_alloc.translate(ref_type(ref));
70,656✔
642
        const bool sub_isindex = get_context_flag_from_header(sub_header);
70,656✔
643

34,785✔
644
        // List of row indices with common prefix up to this point, in sorted order.
34,785✔
645
        if (!sub_isindex) {
70,656✔
646
            const IntegerColumn sub(m_alloc, ref_type(ref));
39,021✔
647
            return from_list_all(value, result, sub, column);
39,021✔
648
        }
39,021✔
649

14,943✔
650
        // Recurse into sub-index;
14,943✔
651
        header = sub_header;
31,635✔
652
        data = get_data_from_header(header);
31,635✔
653
        width = get_width_from_header(header);
31,635✔
654
        is_inner_node = get_is_inner_bptree_node_from_header(header);
31,635✔
655

14,943✔
656
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
14,943✔
657
        stringoffset += 4;
31,635✔
658

14,943✔
659
        // Update 4 byte index key
14,943✔
660
        key = StringIndex::create_key(index_data, stringoffset);
31,635✔
661
    }
31,635✔
662
}
47,742✔
663

664
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
665
{
72✔
666
    const char* sub_header = alloc.translate(ref_type(ref));
72✔
667
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
668

36✔
669
    if (sub_isindex) {
72✔
670
        Array tree(alloc);
36✔
671
        tree.init_from_ref(ref);
36✔
672
        auto sz = tree.size();
36✔
673
        for (size_t n = 1; n < sz; n++) {
90✔
674
            auto rot = tree.get_as_ref_or_tagged(n);
54✔
675
            // Literal row index (tagged)
27✔
676
            if (rot.is_tagged()) {
54✔
677
                result.insert(rot.get_as_int());
30✔
678
            }
30✔
679
            else {
24✔
680
                get_all_keys_below(result, rot.get_as_ref(), alloc);
24✔
681
            }
24✔
682
        }
54✔
683
    }
36✔
684
    else {
36✔
685
        IntegerColumn tree(alloc, ref);
36✔
686
        tree.for_all([&result](int64_t i) {
84✔
687
            result.insert(i);
84✔
688
        });
84✔
689
    }
36✔
690
}
72✔
691

692
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
693
{
66✔
694
    size_t stringoffset = 0;
66✔
695

33✔
696
    for (;;) {
90✔
697
        const char* data = NodeHeader::get_data_from_header(header);
90✔
698
        uint_least8_t width = get_width_from_header(header);
90✔
699

45✔
700
        // Create 4 byte lower and upper key
45✔
701
        key_type lower = 0;
90✔
702
        size_t i = 0;
90✔
703
        size_t n = str.size() - stringoffset;
90✔
704
        bool is_at_string_end = (n <= 4);
90✔
705
        if (!is_at_string_end) {
90✔
706
            n = 4;
30✔
707
        }
30✔
708
        while (i < n) {
360✔
709
            lower <<= 8;
270✔
710
            lower += str[stringoffset + i++];
270✔
711
        }
270✔
712
        size_t shift = (4 - i) * 8;
90✔
713
        key_type upper = lower + 1;
90✔
714
        lower <<= shift;
90✔
715
        upper <<= shift;
90✔
716
        upper--;
90✔
717

45✔
718
        // Get index array
45✔
719
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
90✔
720

45✔
721
        // Find the position matching the key
45✔
722
        const char* offsets_header = m_alloc.translate(offsets_ref);
90✔
723
        const char* offsets_data = get_data_from_header(offsets_header);
90✔
724
        size_t offsets_size = get_size_from_header(offsets_header);
90✔
725
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
90✔
726
        // If key is outside range, we know there can be no match
45✔
727
        if (pos == offsets_size)
90✔
728
            return;
×
729

45✔
730
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
90✔
731

45✔
732
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
90✔
733
            bool done = false;
×
734
            while (!done) {
×
735
                // Recursively call with child node
736
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
737
                _index_string_find_all_prefix(result, str, header);
×
738

739
                // Check if current node is past end of key range or last node
740
                auto key = get_direct<32>(offsets_data, pos++);
×
741
                done = key > upper || pos == offsets_size;
×
742
            }
×
743
            return;
×
744
        }
×
745

45✔
746
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
90✔
747

45✔
748
        if (pos == end) {
90✔
749
            // No match
9✔
750
            return;
18✔
751
        }
18✔
752

36✔
753
        if (is_at_string_end) {
72✔
754
            // now get all entries from start to end
24✔
755
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
108✔
756
                uint64_t ref = get_direct(data, width, ndx);
60✔
757
                // Literal row index (tagged)
30✔
758
                if (ref & 1) {
60✔
759
                    result.emplace(int64_t(ref >> 1));
12✔
760
                }
12✔
761
                else {
48✔
762
                    get_all_keys_below(result, to_ref(ref), m_alloc);
48✔
763
                }
48✔
764
            }
60✔
765
            return;
48✔
766
        }
48✔
767

12✔
768
        // When we are not at end of string then we are comparing against the whole key
12✔
769
        // and we can have at most one match
12✔
770
        REALM_ASSERT(end == pos + 1);
24✔
771
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
24✔
772
        stringoffset += 4;
24✔
773
    }
24✔
774
}
66✔
775

776
ObjKey IndexArray::index_string_find_first(const Mixed& value, const ClusterColumn& column) const
777
{
1,289,424✔
778
    InternalFindResult unused;
1,289,424✔
779
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,289,424✔
780
}
1,289,424✔
781

782

783
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, const Mixed& value, const ClusterColumn& column,
784
                                       bool case_insensitive) const
785
{
54,324✔
786
    if (case_insensitive && value.is_type(type_String)) {
54,324✔
787
        index_string_all_ins(value.get_string(), result, column);
6,582✔
788
    }
6,582✔
789
    else {
47,742✔
790
        index_string_all(value, result, column);
47,742✔
791
    }
47,742✔
792
}
54,324✔
793

794
FindRes IndexArray::index_string_find_all_no_copy(const Mixed& value, const ClusterColumn& column,
795
                                                  InternalFindResult& result) const
796
{
657,702✔
797
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
657,702✔
798
}
657,702✔
799

800
size_t IndexArray::index_string_count(const Mixed& value, const ClusterColumn& column) const
801
{
12,864✔
802
    InternalFindResult unused;
12,864✔
803
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
804
}
12,864✔
805

806
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
807
{
682,104✔
808
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
682,059✔
809
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
682,104✔
810
    top->create(type);                                      // Throws
682,104✔
811

340,605✔
812
    // Mark that this is part of index
340,605✔
813
    // (as opposed to columns under leaves)
340,605✔
814
    top->set_context_flag(true);
682,104✔
815

340,605✔
816
    // Add subcolumns for leaves
340,605✔
817
    Array values(alloc);
682,104✔
818
    values.create(Array::type_Normal);       // Throws
682,104✔
819
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
682,104✔
820
    top->add(values.get_ref());              // first entry in refs points to offsets
682,104✔
821

340,605✔
822
    return top;
682,104✔
823
}
682,104✔
824

825
StringIndex::key_type StringIndex::get_last_key() const
826
{
347,562✔
827
    Array offsets(m_array->get_alloc());
347,562✔
828
    get_child(*m_array, 0, offsets);
347,562✔
829
    return key_type(offsets.back());
347,562✔
830
}
347,562✔
831

832

833
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
834
{
9,886,740✔
835
    // Create 4 byte index key
4,920,510✔
836
    key_type key = create_key(index_data, offset);
9,886,740✔
837
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
9,886,740✔
838
}
9,886,740✔
839

840
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
841
                                                   const IntegerColumnIterator& lower)
842
{
2,072,391✔
843
    // At this point there exists duplicates of this value, we need to
1,034,694✔
844
    // insert value beside it's duplicates so that rows are also sorted
1,034,694✔
845
    // in ascending order.
1,034,694✔
846
    IntegerColumn::const_iterator upper = [&]() {
2,072,394✔
847
        if (m_target_column.full_word()) {
2,072,364✔
848
            return list.cend();
354✔
849
        }
354✔
850
        else {
2,072,010✔
851
            SortedListComparator slc(m_target_column);
2,072,010✔
852
            return slc.find_end_of_unsorted(value, list, lower);
2,072,010✔
853
        }
2,072,010✔
854
    }();
2,072,364✔
855
    // find insert position (the list has to be kept in sorted order)
1,034,694✔
856
    // In most cases the refs will be added to the end. So we test for that
1,034,694✔
857
    // first to see if we can avoid the binary search for insert position
1,034,694✔
858
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,072,391✔
859
    int64_t last_key_value = *last;
2,072,391✔
860
    if (key.value >= last_key_value) {
2,072,391✔
861
        list.insert(upper.get_position(), key.value);
2,036,688✔
862
    }
2,036,688✔
863
    else {
35,703✔
864
        // insert into the group of duplicates, keeping object keys sorted
17,634✔
865
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
35,703✔
866
        if (*inner_lower != key.value) {
35,703✔
867
            list.insert(inner_lower.get_position(), key.value);
35,415✔
868
        }
35,415✔
869
    }
35,703✔
870
}
2,072,391✔
871

872
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
873
{
1,923✔
874
    SortedListComparator slc(m_target_column);
1,923✔
875
    IntegerColumn::const_iterator it_end = list.cend();
1,923✔
876
    IntegerColumn::const_iterator lower = slc.find_start_of_unsorted(value, list);
1,923✔
877

948✔
878
    if (lower == it_end) {
1,923✔
879
        // Not found and everything is less, just append it to the end.
759✔
880
        list.add(key.value);
1,530✔
881
    }
1,530✔
882
    else {
393✔
883
        ObjKey lower_key = ObjKey(*lower);
393✔
884
        Mixed lower_value = get(lower_key);
393✔
885

189✔
886
        if (lower_value != value) {
393✔
887
            list.insert(lower.get_position(), key.value);
393✔
888
        }
393✔
889
        else {
×
890
            // At this point there exists duplicates of this value, we need to
891
            // insert value beside it's duplicates so that rows are also sorted
892
            // in ascending order.
893
            insert_to_existing_list_at_lower(key, value, list, lower);
×
894
        }
×
895
    }
393✔
896
}
1,923✔
897

898

899
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
900
{
2,307✔
901
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
2,307✔
902

1,203✔
903
    // Create 4 byte index key
1,203✔
904
    key_type key = create_key(index_data, offset);
2,307✔
905

1,203✔
906
    // Get subnode table
1,203✔
907
    Allocator& alloc = m_array->get_alloc();
2,307✔
908
    Array values(alloc);
2,307✔
909
    get_child(*m_array, 0, values);
2,307✔
910
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,307✔
911

1,203✔
912
    size_t ins_pos = values.lower_bound_int(key);
2,307✔
913
    if (ins_pos == values.size()) {
2,307✔
914
        // When key is outside current range, we can just add it
1,203✔
915
        values.add(key);
2,307✔
916
        m_array->add(ref);
2,307✔
917
        return;
2,307✔
918
    }
2,307✔
919

920
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
921
    // Since we only use this for moving existing values to new
922
    // subindexes, there should never be an existing match.
923
    key_type k = key_type(values.get(ins_pos));
×
924
    REALM_ASSERT(k != key);
×
925
#endif // LCOV_EXCL_STOP ignore debug code
×
926

927
    // If key is not present we add it at the correct location
928
    values.insert(ins_pos, key);
×
929
    m_array->insert(ins_pos + 1, ref);
×
930
}
×
931

932

933
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
934
{
9,886,689✔
935
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
9,886,689✔
936
    switch (nc.type) {
9,886,689✔
937
        case NodeChange::change_None:
9,884,013✔
938
            return;
9,884,013✔
939
        case NodeChange::change_InsertBefore: {
✔
940
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
×
941
            new_node.node_add_key(nc.ref1);
×
942
            new_node.node_add_key(get_ref());
×
943
            m_array->init_from_ref(new_node.get_ref());
×
944
            m_array->update_parent();
×
945
            return;
×
946
        }
×
947
        case NodeChange::change_InsertAfter: {
6✔
948
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
6✔
949
            new_node.node_add_key(get_ref());
6✔
950
            new_node.node_add_key(nc.ref1);
6✔
951
            m_array->init_from_ref(new_node.get_ref());
6✔
952
            m_array->update_parent();
6✔
953
            return;
6✔
954
        }
×
955
        case NodeChange::change_Split: {
75✔
956
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
75✔
957
            new_node.node_add_key(nc.ref1);
75✔
958
            new_node.node_add_key(nc.ref2);
75✔
959
            m_array->init_from_ref(new_node.get_ref());
75✔
960
            m_array->update_parent();
75✔
961
            return;
75✔
962
        }
×
963
    }
×
964
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
965
}
×
966

967

968
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
969
                                               const Mixed& value)
970
{
10,171,047✔
971
    Allocator& alloc = m_array->get_alloc();
10,171,047✔
972
    if (m_array->is_inner_bptree_node()) {
10,171,047✔
973
        // Get subnode table
136,761✔
974
        Array keys(alloc);
282,465✔
975
        get_child(*m_array, 0, keys);
282,465✔
976
        REALM_ASSERT(m_array->size() == keys.size() + 1);
282,465✔
977

136,761✔
978
        // Find the subnode containing the item
136,761✔
979
        size_t node_ndx = keys.lower_bound_int(key);
282,465✔
980
        if (node_ndx == keys.size()) {
282,465✔
981
            // node can never be empty, so try to fit in last item
9,417✔
982
            node_ndx = keys.size() - 1;
18,879✔
983
        }
18,879✔
984

136,761✔
985
        // Get sublist
136,761✔
986
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
282,465✔
987
        ref_type ref = m_array->get_as_ref(refs_ndx);
282,465✔
988
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
282,465✔
989

136,761✔
990
        // Insert item
136,761✔
991
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
282,465✔
992
        if (nc.type == NodeChange::change_None) {
282,465✔
993
            // update keys
136,521✔
994
            key_type last_key = target.get_last_key();
281,973✔
995
            keys.set(node_ndx, last_key);
281,973✔
996
            return NodeChange::change_None; // no new nodes
281,973✔
997
        }
281,973✔
998

240✔
999
        if (nc.type == NodeChange::change_InsertAfter) {
492✔
1000
            ++node_ndx;
21✔
1001
            ++refs_ndx;
21✔
1002
        }
21✔
1003

240✔
1004
        // If there is room, just update node directly
240✔
1005
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
492✔
1006
            if (nc.type == NodeChange::change_Split) {
492✔
1007
                node_insert_split(node_ndx, nc.ref2);
471✔
1008
            }
471✔
1009
            else {
21✔
1010
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
21✔
1011
            }
21✔
1012
            return NodeChange::change_None;
492✔
1013
        }
492✔
1014

1015
        // Else create new node
1016
        StringIndex new_node(inner_node_tag(), alloc);
×
1017
        if (nc.type == NodeChange::change_Split) {
×
1018
            // update offset for left node
1019
            key_type last_key = target.get_last_key();
×
1020
            keys.set(node_ndx, last_key);
×
1021

1022
            new_node.node_add_key(nc.ref2);
×
1023
            ++node_ndx;
×
1024
            ++refs_ndx;
×
1025
        }
×
1026
        else {
×
1027
            new_node.node_add_key(nc.ref1);
×
1028
        }
×
1029

1030
        switch (node_ndx) {
×
1031
            case 0: // insert before
×
1032
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
1033
            case REALM_MAX_BPNODE_SIZE: // insert after
×
1034
                if (nc.type == NodeChange::change_Split)
×
1035
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
1036
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
1037
            default: // split
×
1038
                // Move items after split to new node
1039
                size_t len = m_array->size();
×
1040
                for (size_t i = refs_ndx; i < len; ++i) {
×
1041
                    ref_type ref_i = m_array->get_as_ref(i);
×
1042
                    new_node.node_add_key(ref_i);
×
1043
                }
×
1044
                keys.truncate(node_ndx);
×
1045
                m_array->truncate(refs_ndx);
×
1046
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
1047
        }
9,888,582✔
1048
    }
9,888,582✔
1049
    else {
9,888,582✔
1050
        // Is there room in the list?
4,924,359✔
1051
        Array old_keys(alloc);
9,888,582✔
1052
        get_child(*m_array, 0, old_keys);
9,888,582✔
1053
        const size_t old_offsets_size = old_keys.size();
9,888,582✔
1054
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
9,888,582✔
1055

4,924,359✔
1056
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
9,888,582✔
1057

4,924,359✔
1058
        // See if we can fit entry into current leaf
4,924,359✔
1059
        // Works if there is room or it can join existing entries
4,924,359✔
1060
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
9,888,582✔
1061
            return NodeChange::change_None;
9,887,922✔
1062

4,065✔
1063
        // Create new list for item (a leaf)
4,065✔
1064
        StringIndex new_list(m_target_column, alloc);
2,147,487,712✔
1065

4,065✔
1066
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
2,147,487,712✔
1067

4,065✔
1068
        size_t ndx = old_keys.lower_bound_int(key);
2,147,487,712✔
1069

4,065✔
1070
        // insert before
4,065✔
1071
        if (ndx == 0)
2,147,487,712✔
1072
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
×
1073

4,065✔
1074
        // insert after
4,065✔
1075
        if (ndx == old_offsets_size)
2,147,487,712✔
1076
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
27✔
1077

4,050✔
1078
        // split
4,050✔
1079
        Array new_keys(alloc);
2,147,487,697✔
1080
        get_child(*new_list.m_array, 0, new_keys);
2,147,487,697✔
1081
        // Move items after split to new list
4,050✔
1082
        for (size_t i = ndx; i < old_offsets_size; ++i) {
296,268✔
1083
            int64_t v2 = old_keys.get(i);
292,218✔
1084
            int64_t v3 = m_array->get(i + 1);
292,218✔
1085

142,461✔
1086
            new_keys.add(v2);
292,218✔
1087
            new_list.m_array->add(v3);
292,218✔
1088
        }
292,218✔
1089
        old_keys.truncate(ndx);
2,147,487,697✔
1090
        m_array->truncate(ndx + 1);
2,147,487,697✔
1091

4,050✔
1092
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
2,147,487,697✔
1093
    }
2,147,487,697✔
1094
}
10,171,047✔
1095

1096

1097
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1098
{
471✔
1099
    REALM_ASSERT(m_array->is_inner_bptree_node());
471✔
1100
    REALM_ASSERT(new_ref);
471✔
1101

228✔
1102
    Allocator& alloc = m_array->get_alloc();
471✔
1103
    Array offsets(alloc);
471✔
1104
    get_child(*m_array, 0, offsets);
471✔
1105

228✔
1106
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
471✔
1107
    REALM_ASSERT(ndx < offsets.size());
471✔
1108
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
471✔
1109

228✔
1110
    // Get sublists
228✔
1111
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
471✔
1112
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
471✔
1113
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
471✔
1114
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
471✔
1115

228✔
1116
    // Update original key
228✔
1117
    key_type last_key = orig_col.get_last_key();
471✔
1118
    offsets.set(ndx, last_key);
471✔
1119

228✔
1120
    // Insert new ref
228✔
1121
    key_type new_key = new_col.get_last_key();
471✔
1122
    offsets.insert(ndx + 1, new_key);
471✔
1123
    m_array->insert(ndx + 2, new_ref);
471✔
1124
}
471✔
1125

1126

1127
void StringIndex::node_insert(size_t ndx, size_t ref)
1128
{
21✔
1129
    REALM_ASSERT(ref);
21✔
1130
    REALM_ASSERT(m_array->is_inner_bptree_node());
21✔
1131

12✔
1132
    Allocator& alloc = m_array->get_alloc();
21✔
1133
    Array offsets(alloc);
21✔
1134
    get_child(*m_array, 0, offsets);
21✔
1135
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
21✔
1136

12✔
1137
    REALM_ASSERT(ndx <= offsets.size());
21✔
1138
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
21✔
1139

12✔
1140
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
21✔
1141
    key_type last_key = col.get_last_key();
21✔
1142

12✔
1143
    offsets.insert(ndx, last_key);
21✔
1144
    m_array->insert(ndx + 1, ref);
21✔
1145
}
21✔
1146

1147
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1148
                              bool noextend)
1149
{
9,875,280✔
1150
    REALM_ASSERT(!m_array->is_inner_bptree_node());
9,875,280✔
1151
    if (offset >= s_max_offset && m_target_column.full_word()) {
9,875,280✔
1152
        size_t len = value.get_string().size();
6✔
1153
        size_t max = s_max_offset;
6✔
1154
        throw LogicError(ErrorCodes::LimitExceeded,
6✔
1155
                         util::format("String of length %1 exceeds maximum string length of %2.", len, max));
6✔
1156
    }
6✔
1157

4,917,570✔
1158
    // Get subnode table
4,917,570✔
1159
    Allocator& alloc = m_array->get_alloc();
9,875,274✔
1160
    Array keys(alloc);
9,875,274✔
1161
    get_child(*m_array, 0, keys);
9,875,274✔
1162
    REALM_ASSERT(m_array->size() == keys.size() + 1);
9,875,274✔
1163

4,917,570✔
1164
    // If we are keeping the complete string in the index
4,917,570✔
1165
    // we want to know if this is the last part
4,917,570✔
1166
    bool is_at_string_end = offset + 4 >= index_data.size();
9,875,274✔
1167

4,917,570✔
1168
    size_t ins_pos = keys.lower_bound_int(key);
9,875,274✔
1169
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
9,875,274✔
1170

4,917,570✔
1171
    if (ins_pos == keys.size()) {
9,875,274✔
1172
        if (noextend)
815,973✔
1173
            return false;
27✔
1174

405,225✔
1175
        // When key is outside current range, we can just add it
405,225✔
1176
        keys.add(key);
815,946✔
1177
        if (!m_target_column.full_word() || is_at_string_end) {
815,946✔
1178
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
454,485✔
1179
            m_array->add(shifted);
454,485✔
1180
        }
454,485✔
1181
        else {
361,461✔
1182
            // create subindex for rest of string
180,753✔
1183
            StringIndex subindex(m_target_column, m_array->get_alloc());
361,461✔
1184
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
361,461✔
1185
            m_array->add(subindex.get_ref());
361,461✔
1186
        }
361,461✔
1187
        return true;
815,946✔
1188
    }
815,946✔
1189

4,512,330✔
1190
    key_type k = key_type(keys.get(ins_pos));
9,059,301✔
1191

4,512,330✔
1192
    // If key is not present we add it at the correct location
4,512,330✔
1193
    if (k != key) {
9,059,301✔
1194
        if (noextend)
869,544✔
1195
            return false;
546✔
1196

427,323✔
1197
        keys.insert(ins_pos, key);
868,998✔
1198
        if (!m_target_column.full_word() || is_at_string_end) {
868,998✔
1199
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
687,903✔
1200
            m_array->insert(ins_pos_refs, shifted);
687,903✔
1201
        }
687,903✔
1202
        else {
181,095✔
1203
            // create subindex for rest of string
90,600✔
1204
            StringIndex subindex(m_target_column, m_array->get_alloc());
181,095✔
1205
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
181,095✔
1206
            m_array->insert(ins_pos_refs, subindex.get_ref());
181,095✔
1207
        }
181,095✔
1208
        return true;
868,998✔
1209
    }
868,998✔
1210

4,084,746✔
1211
    // This leaf already has a slot for for the key
4,084,746✔
1212

4,084,746✔
1213
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,189,757✔
1214
    size_t suboffset = offset + s_index_key_length;
8,189,757✔
1215

4,084,746✔
1216
    // Single match (lowest bit set indicates literal row_ndx)
4,084,746✔
1217
    if ((slot_value & 1) != 0) {
8,189,757✔
1218
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
49,740✔
1219
        Mixed v2 = m_target_column.full_word() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
49,434✔
1220
        if (v2 == value) {
49,740✔
1221
            if (obj_key.value != obj_key2.value) {
9,693✔
1222
                // Strings are equal but this is not a list.
4,752✔
1223
                // Create a list and add both rows.
4,752✔
1224

4,752✔
1225
                // convert to list (in sorted order)
4,752✔
1226
                Array row_list(alloc);
9,663✔
1227
                row_list.create(Array::type_Normal); // Throws
9,663✔
1228
                row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
9,087✔
1229
                row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
9,087✔
1230
                m_array->set(ins_pos_refs, row_list.get_ref());
9,663✔
1231
            }
9,663✔
1232
        }
9,693✔
1233
        else {
40,047✔
1234
            StringConversionBuffer buffer;
40,047✔
1235
            auto index_data_2 = v2.get_index_data(buffer);
40,047✔
1236
            if (index_data == index_data_2 || suboffset > s_max_offset) {
40,047✔
1237
                // These strings have the same prefix up to this point but we
237✔
1238
                // don't want to recurse further, create a list in sorted order.
237✔
1239
                bool row_ndx_first = value < v2;
513✔
1240
                Array row_list(alloc);
513✔
1241
                row_list.create(Array::type_Normal); // Throws
513✔
1242
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
381✔
1243
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
381✔
1244
                m_array->set(ins_pos_refs, row_list.get_ref());
513✔
1245
            }
513✔
1246
            else {
39,534✔
1247
                // These strings have the same prefix up to this point but they
19,713✔
1248
                // are actually not equal. Extend the tree recursivly until the
19,713✔
1249
                // prefix of these strings is different.
19,713✔
1250
                StringIndex subindex(m_target_column, m_array->get_alloc());
39,534✔
1251
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
39,534✔
1252
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
39,534✔
1253
                // Join the string of SubIndices to the current position of m_array
19,713✔
1254
                m_array->set(ins_pos_refs, subindex.get_ref());
39,534✔
1255
            }
39,534✔
1256
        }
40,047✔
1257
        return true;
49,740✔
1258
    }
49,740✔
1259

4,060,029✔
1260
    // If there already is a list of matches, we see if we fit there
4,060,029✔
1261
    // or it has to be split into a subindex
4,060,029✔
1262
    ref_type ref = ref_type(slot_value);
8,140,017✔
1263
    char* header = alloc.translate(ref);
8,140,017✔
1264
    if (!Array::get_context_flag_from_header(header)) {
8,140,017✔
1265
        IntegerColumn sub(alloc, ref); // Throws
2,076,078✔
1266
        sub.set_parent(m_array.get(), ins_pos_refs);
2,076,078✔
1267

1,036,641✔
1268
        IntegerColumn::const_iterator it_end = sub.cend();
2,076,078✔
1269
        IntegerColumn::const_iterator lower = it_end;
2,076,078✔
1270

1,036,641✔
1271
        auto value_exists_in_list = [&]() {
2,076,642✔
1272
            if (m_target_column.full_word()) {
2,076,642✔
1273
                lower = sub.cbegin();
360✔
1274
                return reconstruct_string(offset, key, index_data) == value.get_string();
360✔
1275
            }
360✔
1276
            SortedListComparator slc(m_target_column);
2,076,282✔
1277
            lower = slc.find_start_of_unsorted(value, sub);
2,076,282✔
1278

1,036,908✔
1279
            if (lower != it_end) {
2,076,282✔
1280
                Mixed lower_value = get(ObjKey(*lower));
2,072,772✔
1281
                if (lower_value == value) {
2,072,772✔
1282
                    return true;
2,072,154✔
1283
                }
2,072,154✔
1284
            }
4,128✔
1285
            return false;
4,128✔
1286
        };
4,128✔
1287

1,036,641✔
1288
        // If we found the value in this list, add the duplicate to the list.
1,036,641✔
1289
        if (value_exists_in_list()) {
2,076,078✔
1290
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
2,072,439✔
1291
        }
2,072,439✔
1292
        else {
3,639✔
1293
            // If the list only stores duplicates we are free to branch and
1,920✔
1294
            // and create a sub index with this existing list as one of the
1,920✔
1295
            // leafs, but if the list doesn't only contain duplicates we
1,920✔
1296
            // must respect that we store a common key prefix up to this
1,920✔
1297
            // point and insert into the existing list.
1,920✔
1298
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
3,639✔
1299
            StringConversionBuffer buffer;
3,639✔
1300
            StringData index_data_2 = m_target_column.full_word() ? reconstruct_string(offset, key, index_data)
1,923✔
1301
                                                                  : get(key_of_any_dup).get_index_data(buffer);
3,636✔
1302
            if (index_data == index_data_2 || suboffset > s_max_offset) {
4,218✔
1303
                insert_to_existing_list(obj_key, value, sub);
1,923✔
1304
            }
1,923✔
1305
            else {
1,716✔
1306
#ifdef REALM_DEBUG
1,716✔
1307
                bool contains_only_duplicates = true;
1,716✔
1308
                if (!m_target_column.full_word() && sub.size() > 1) {
2,301✔
1309
                    ObjKey first_key = ObjKey(sub.get(0));
1,335✔
1310
                    ObjKey last_key = ObjKey(sub.back());
1,335✔
1311
                    auto first = get(first_key);
1,335✔
1312
                    auto last = get(last_key);
1,335✔
1313
                    // Since the list is kept in sorted order, the first and
714✔
1314
                    // last values will be the same only if the whole list is
714✔
1315
                    // storing duplicate values.
714✔
1316
                    if (first != last) {
1,335✔
1317
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
1318
                    }
×
1319
                }
1,335✔
1320
                REALM_ASSERT_DEBUG(contains_only_duplicates);
1,716✔
1321
#endif
1,716✔
1322
                // The buffer is needed for when this is an integer index.
972✔
1323
                StringIndex subindex(m_target_column, m_array->get_alloc());
1,716✔
1324
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
1,716✔
1325
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
1,716✔
1326
                m_array->set(ins_pos_refs, subindex.get_ref());
1,716✔
1327
            }
1,716✔
1328
        }
3,639✔
1329
        return true;
2,076,078✔
1330
    }
2,076,078✔
1331

3,023,388✔
1332
    // The key matches, but there is a subindex here so go down a level in the tree.
3,023,388✔
1333
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,063,939✔
1334
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
6,063,939✔
1335

3,023,388✔
1336
    return true;
6,063,939✔
1337
}
6,063,939✔
1338

1339
Mixed StringIndex::get(ObjKey key) const
1340
{
3,676,548✔
1341
    return m_target_column.get_value(key);
3,676,548✔
1342
}
3,676,548✔
1343

1344
void StringIndex::erase(ObjKey key)
1345
{
888,936✔
1346
    StringConversionBuffer buffer;
888,936✔
1347
    if (m_target_column.full_word()) {
888,936✔
1348
        if (m_target_column.tokenize()) {
78✔
1349
            // This is a full text index
36✔
1350
            auto index_data(get(key).get_index_data(buffer));
72✔
1351
            auto words = Tokenizer::get_instance()->reset(std::string_view(index_data)).get_all_tokens();
72✔
1352
            for (auto& w : words) {
2,460✔
1353
                erase_string(key, w);
2,460✔
1354
            }
2,460✔
1355
        }
72✔
1356
        else {
6✔
1357
            // This is a list (of strings)
3✔
1358
            erase_list(key, m_target_column.get_list(key));
6✔
1359
        }
6✔
1360
    }
78✔
1361
    else {
888,858✔
1362
        erase_string(key, get(key).get_index_data(buffer));
888,858✔
1363
    }
888,858✔
1364
}
888,936✔
1365

1366
void StringIndex::erase_list(ObjKey key, const Lst<String>& list)
1367
{
12✔
1368
    std::vector<StringData> strings;
12✔
1369
    strings.reserve(list.size());
12✔
1370
    for (auto& val : list) {
48✔
1371
        strings.push_back(val);
48✔
1372
    }
48✔
1373

6✔
1374
    std::sort(strings.begin(), strings.end());
12✔
1375
    auto last = std::unique(strings.begin(), strings.end());
12✔
1376
    for (auto it = strings.begin(); it != last; ++it) {
48✔
1377
        erase_string(key, *it);
36✔
1378
    }
36✔
1379
}
12✔
1380

1381
namespace {
1382
template <typename T>
1383
void intersect(std::vector<ObjKey>& result, T& keys)
1384
{
294✔
1385
    if (result.empty()) {
294✔
1386
        result.reserve(keys.size());
198✔
1387
        for (auto k : keys) {
498✔
1388
            result.emplace_back(k);
498✔
1389
        }
498✔
1390
    }
198✔
1391
    else {
96✔
1392
        auto it = result.begin();
96✔
1393
        auto keep = it;
96✔
1394
        auto m = keys.begin();
96✔
1395

48✔
1396
        // only keep intersection
48✔
1397
        while (it != result.end() && m != keys.end()) {
354✔
1398
            int64_t int_val = *m;
258✔
1399
            if (it->value < int_val) {
258✔
1400
                it++; // don't keep if match is not in new set
18✔
1401
            }
18✔
1402
            else if (it->value > int_val) {
240✔
1403
                ++m; // ignore new matches
102✔
1404
            }
102✔
1405
            else {
138✔
1406
                // Found both places - make sure it is kept
69✔
1407
                if (keep < it)
138✔
1408
                    *keep = *it;
6✔
1409
                ++keep;
138✔
1410
                ++it;
138✔
1411
                ++m;
138✔
1412
            }
138✔
1413
        }
258✔
1414
        if (keep != result.end()) {
96✔
1415
            result.erase(keep, result.end());
24✔
1416
        }
24✔
1417
    }
96✔
1418
}
294✔
1419

1420
struct FindResWrapper {
1421
    InternalFindResult& res;
1422
    IntegerColumn& indexes;
1423
    size_t n = 0;
1424
    size_t size()
1425
    {
144✔
1426
        return res.end_ndx - res.start_ndx;
144✔
1427
    }
144✔
1428
    auto begin()
1429
    {
228✔
1430
        return indexes.cbegin();
228✔
1431
    }
228✔
1432
    auto end()
1433
    {
384✔
1434
        return indexes.cend();
384✔
1435
    }
384✔
1436
};
1437
} // namespace
1438

1439
void StringIndex::insert_bulk(const ArrayUnsigned* keys, uint64_t key_offset, size_t num_values, ArrayPayload& values)
1440
{
101,991✔
1441
    if (keys) {
101,991✔
1442
        for (size_t i = 0; i < num_values; ++i) {
1,185✔
1443
            ObjKey key(keys->get(i) + key_offset);
978✔
1444
            insert(key, values.get_any(i));
978✔
1445
        }
978✔
1446
    }
207✔
1447
    else {
101,784✔
1448
        for (size_t i = 0; i < num_values; ++i) {
1,425,621✔
1449
            ObjKey key(i + key_offset);
1,323,837✔
1450
            insert(key, values.get_any(i));
1,323,837✔
1451
        }
1,323,837✔
1452
    }
101,784✔
1453
}
101,991✔
1454

1455
void StringIndex::insert_bulk_list(const ArrayUnsigned* keys, uint64_t key_offset, size_t num_values,
1456
                                   ArrayInteger& ref_array)
1457
{
258✔
1458
    auto get_obj_key = [&](size_t n) {
60,000✔
1459
        if (keys) {
60,000✔
1460
            return ObjKey(keys->get(n) + key_offset);
×
1461
        }
×
1462
        return ObjKey(n + key_offset);
60,000✔
1463
    };
60,000✔
1464
    for (size_t i = 0; i < num_values; ++i) {
60,258✔
1465
        ObjKey key = get_obj_key(i);
60,000✔
1466
        if (auto ref = to_ref(ref_array.get(i))) {
60,000✔
1467
            BPlusTree<String> values(ref_array.get_alloc());
60,000✔
1468
            values.init_from_ref(ref);
60,000✔
1469
            values.for_all([&](const StringData& str) {
180,000✔
1470
                insert(key, str);
180,000✔
1471
            });
180,000✔
1472
        }
60,000✔
1473
    }
60,000✔
1474
}
258✔
1475

1476

1477
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1478
{
378✔
1479
    InternalFindResult res;
378✔
1480
    REALM_ASSERT(result.empty());
378✔
1481

189✔
1482
    auto tokenizer = Tokenizer::get_instance();
378✔
1483
    tokenizer->reset({value.data(), value.size()});
378✔
1484
    auto [includes, excludes] = tokenizer->get_search_tokens();
378✔
1485
    if (includes.empty()) {
378✔
1486
        if (excludes.empty()) {
24✔
1487
            throw InvalidArgument("Missing search token");
6✔
1488
        }
6✔
1489
        result = m_target_column.get_all_keys();
18✔
1490
    }
18✔
1491
    else {
354✔
1492
        for (auto& token : includes) {
432✔
1493
            if (token.back() == '*') {
432✔
1494
                std::set<int64_t> keys;
66✔
1495
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
66✔
1496
                intersect(result, keys);
66✔
1497
            }
66✔
1498
            else {
366✔
1499
                switch (find_all_no_copy(StringData{token}, res)) {
366✔
1500
                    case FindRes_not_found:
12✔
1501
                        result.clear();
12✔
1502
                        break;
12✔
1503
                    case FindRes_column: {
228✔
1504
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
228✔
1505
                        FindResWrapper wrapper{res, indexes};
228✔
1506
                        intersect(result, wrapper);
228✔
1507
                        break;
228✔
1508
                    }
×
1509
                    case FindRes_single:
126✔
1510
                        // merge in single res
63✔
1511
                        if (result.empty()) {
126✔
1512
                            result.emplace_back(res.payload);
96✔
1513
                        }
96✔
1514
                        else {
30✔
1515
                            ObjKey key(res.payload);
30✔
1516
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1517
                            if (pos != result.end() && key == *pos) {
30✔
1518
                                result.clear();
30✔
1519
                                result.push_back(key);
30✔
1520
                            }
30✔
1521
                            else {
×
1522
                                result.clear();
×
1523
                            }
×
1524
                        }
30✔
1525
                        break;
126✔
1526
                }
432✔
1527
            }
432✔
1528
            if (result.empty())
432✔
1529
                return;
36✔
1530
        }
432✔
1531
    }
354✔
1532

189✔
1533
    for (auto& token : excludes) {
357✔
1534
        if (token.back() == '*') {
96✔
1535
            throw IllegalOperation("Exclude by prefix is not implemented");
×
1536
        }
×
1537
        if (result.empty())
96✔
1538
            return;
×
1539

48✔
1540
        switch (find_all_no_copy(StringData{token}, res)) {
96✔
1541
            case FindRes_not_found:
✔
1542
                // Nothing to exclude
1543
                break;
×
1544
            case FindRes_column: {
60✔
1545
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1546

30✔
1547
                auto it = result.begin();
60✔
1548
                auto keep = it;
60✔
1549
                size_t m = res.start_ndx;
60✔
1550
                auto idx_val = indexes.get(m);
60✔
1551

30✔
1552
                while (it != result.end()) {
390✔
1553
                    if (it->value < idx_val) {
330✔
1554
                        // Not found in excludes
78✔
1555
                        if (keep < it)
156✔
1556
                            *keep = *it;
156✔
1557
                        ++keep;
156✔
1558
                        ++it;
156✔
1559
                    }
156✔
1560
                    else {
174✔
1561
                        if (it->value == idx_val) {
174✔
1562
                            // found in excludes - don't keep
75✔
1563
                            ++it;
150✔
1564
                        }
150✔
1565
                        ++m;
174✔
1566
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
153✔
1567
                    }
174✔
1568
                }
330✔
1569
                if (keep != result.end()) {
60✔
1570
                    result.erase(keep, result.end());
60✔
1571
                }
60✔
1572
                break;
60✔
1573
            }
×
1574
            case FindRes_single: {
36✔
1575
                // exclude single res
18✔
1576
                ObjKey key(res.payload);
36✔
1577
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1578
                if (pos != result.end() && key == *pos) {
36✔
1579
                    result.erase(pos);
36✔
1580
                }
36✔
1581
                break;
36✔
1582
            }
×
1583
        }
96✔
1584
    }
96✔
1585
}
336✔
1586

1587

1588
void StringIndex::clear()
1589
{
3,096✔
1590
    Array values(m_array->get_alloc());
3,096✔
1591
    get_child(*m_array, 0, values);
3,096✔
1592
    REALM_ASSERT(m_array->size() == values.size() + 1);
3,096✔
1593

894✔
1594
    values.clear();
3,096✔
1595
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
3,096✔
1596

894✔
1597
    size_t size = 1;
3,096✔
1598
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
3,096✔
1599

894✔
1600
    m_array->set_type(Array::type_HasRefs);
3,096✔
1601
}
3,096✔
1602

1603

1604
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
1605
{
1,198,773✔
1606
    Allocator& alloc = m_array->get_alloc();
1,198,773✔
1607
    Array values(alloc);
1,198,773✔
1608
    get_child(*m_array, 0, values);
1,198,773✔
1609
    REALM_ASSERT(m_array->size() == values.size() + 1);
1,198,773✔
1610

598,836✔
1611
    // Create 4 byte index key
598,836✔
1612
    key_type key = create_key(index_data, offset);
1,198,773✔
1613

598,836✔
1614
    const size_t pos = values.lower_bound_int(key);
1,198,773✔
1615
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,198,773✔
1616
    REALM_ASSERT(pos != values.size());
1,198,773✔
1617

598,836✔
1618
    if (m_array->is_inner_bptree_node()) {
1,198,773✔
1619
        ref_type ref = m_array->get_as_ref(pos_refs);
64,734✔
1620
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
64,734✔
1621
        node.do_delete(obj_key, index_data, offset);
64,734✔
1622

32,379✔
1623
        // Update the ref
32,379✔
1624
        if (node.is_empty()) {
64,734✔
1625
            values.erase(pos);
108✔
1626
            m_array->erase(pos_refs);
108✔
1627
            node.destroy();
108✔
1628
        }
108✔
1629
        else {
64,626✔
1630
            key_type max_val = node.get_last_key();
64,626✔
1631
            if (max_val != key_type(values.get(pos)))
64,626✔
1632
                values.set(pos, max_val);
108✔
1633
        }
64,626✔
1634
    }
64,734✔
1635
    else {
1,134,039✔
1636
        uint64_t ref = m_array->get(pos_refs);
1,134,039✔
1637
        if (ref & 1) {
1,134,039✔
1638
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
537,249✔
1639
            values.erase(pos);
537,249✔
1640
            m_array->erase(pos_refs);
537,249✔
1641
        }
537,249✔
1642
        else {
596,790✔
1643
            // A real ref either points to a list or a subindex
297,006✔
1644
            char* header = alloc.translate(ref_type(ref));
596,790✔
1645
            if (Array::get_context_flag_from_header(header)) {
596,790✔
1646
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
242,724✔
1647
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
242,724✔
1648

121,629✔
1649
                if (subindex.is_empty()) {
242,724✔
1650
                    values.erase(pos);
12,186✔
1651
                    m_array->erase(pos_refs);
12,186✔
1652
                    subindex.destroy();
12,186✔
1653
                }
12,186✔
1654
            }
242,724✔
1655
            else {
354,066✔
1656
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
354,066✔
1657
                sub.set_parent(m_array.get(), pos_refs);
354,066✔
1658
                size_t r = sub.find_first(obj_key.value);
354,066✔
1659
                size_t sub_size = sub.size(); // Slow
354,066✔
1660
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
354,066✔
1661
                sub.erase(r);
354,066✔
1662

175,377✔
1663
                if (sub_size == 1) {
354,066✔
1664
                    values.erase(pos);
3,027✔
1665
                    m_array->erase(pos_refs);
3,027✔
1666
                    sub.destroy();
3,027✔
1667
                }
3,027✔
1668
            }
354,066✔
1669
        }
596,790✔
1670
    }
1,134,039✔
1671
}
1,198,773✔
1672

1673
void StringIndex::erase_string(ObjKey key, StringData value)
1674
{
891,495✔
1675
    do_delete(key, value, 0);
891,495✔
1676

444,978✔
1677
    // Collapse top nodes with single item
444,978✔
1678
    while (m_array->is_inner_bptree_node()) {
891,501✔
1679
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
64,734✔
1680
        if (m_array->size() > 2)
64,734✔
1681
            break;
64,728✔
1682

3✔
1683
        ref_type ref = m_array->get_as_ref(1);
6✔
1684
        m_array->set(1, 1); // avoid destruction of the extracted ref
6✔
1685
        m_array->destroy_deep();
6✔
1686
        m_array->init_from_ref(ref);
6✔
1687
        m_array->update_parent();
6✔
1688
    }
6✔
1689
}
891,495✔
1690

1691
namespace {
1692

1693
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
1694
{
6,348✔
1695
    Allocator& alloc = node.get_alloc();
6,348✔
1696
    Array child(alloc);
6,348✔
1697
    size_t n = node.size();
6,348✔
1698
    REALM_ASSERT(n >= 1);
6,348✔
1699
    if (node.is_inner_bptree_node()) {
6,348✔
1700
        // Inner node
1701
        for (size_t i = 1; i < n; ++i) {
×
1702
            ref_type ref = node.get_as_ref(i);
×
1703
            child.init_from_ref(ref);
×
1704
            if (has_duplicate_values(child, target_col))
×
1705
                return true;
×
1706
        }
×
1707
        return false;
×
1708
    }
6,348✔
1709

3,174✔
1710
    // Leaf node
3,174✔
1711
    for (size_t i = 1; i < n; ++i) {
32,832✔
1712
        int_fast64_t value = node.get(i);
28,740✔
1713
        bool is_single_row_index = (value & 1) != 0;
28,740✔
1714
        if (is_single_row_index)
28,740✔
1715
            continue;
23,280✔
1716

2,730✔
1717
        ref_type ref = to_ref(value);
5,460✔
1718
        child.init_from_ref(ref);
5,460✔
1719

2,730✔
1720
        bool is_subindex = child.get_context_flag();
5,460✔
1721
        if (is_subindex) {
5,460✔
1722
            if (has_duplicate_values(child, target_col))
4,872✔
1723
                return true;
1,800✔
1724
            continue;
3,072✔
1725
        }
3,072✔
1726

294✔
1727
        // Child is root of B+-tree of row indexes
294✔
1728
        IntegerColumn sub(alloc, ref);
588✔
1729
        if (sub.size() > 1) {
588✔
1730
            ObjKey first_key = ObjKey(sub.get(0));
480✔
1731
            ObjKey last_key = ObjKey(sub.back());
480✔
1732
            Mixed first = target_col.get_value(first_key);
480✔
1733
            Mixed last = target_col.get_value(last_key);
480✔
1734
            // Since the list is kept in sorted order, the first and
240✔
1735
            // last values will be the same only if the whole list is
240✔
1736
            // storing duplicate values.
240✔
1737
            if (first == last) {
480✔
1738
                return true;
432✔
1739
            }
432✔
1740
            // There may also be several short lists combined, so we need to
24✔
1741
            // check each of these individually for duplicates.
24✔
1742
            IntegerColumn::const_iterator it = sub.cbegin();
48✔
1743
            IntegerColumn::const_iterator it_end = sub.cend();
48✔
1744
            SortedListComparator slc(target_col);
48✔
1745
            while (it != it_end) {
144✔
1746
                Mixed it_data = target_col.get_value(ObjKey(*it));
120✔
1747
                IntegerColumn::const_iterator next = slc.find_end_of_unsorted(it_data, sub, it);
120✔
1748
                size_t count_of_value = next - it; // row index subtraction in `sub`
120✔
1749
                if (count_of_value > 1) {
120✔
1750
                    return true;
24✔
1751
                }
24✔
1752
                it = next;
96✔
1753
            }
96✔
1754
        }
48✔
1755
    }
588✔
1756

3,174✔
1757
    return false;
5,220✔
1758
}
6,348✔
1759

1760
} // anonymous namespace
1761

1762

1763
bool StringIndex::has_duplicate_values() const noexcept
1764
{
1,476✔
1765
    return ::has_duplicate_values(*m_array, m_target_column);
1,476✔
1766
}
1,476✔
1767

1768

1769
bool StringIndex::is_empty() const
1770
{
307,578✔
1771
    return m_array->size() == 1; // first entry in refs points to offsets
307,578✔
1772
}
307,578✔
1773

1774
void StringIndex::insert(ObjKey key, const Mixed& value)
1775
{
2,548,707✔
1776
    StringConversionBuffer buffer;
2,548,707✔
1777
    constexpr size_t offset = 0; // First key from beginning of string
2,548,707✔
1778

1,262,994✔
1779
    if (this->m_target_column.tokenize()) {
2,548,707✔
1780
        if (value.is_type(type_String)) {
198✔
1781
            auto words = Tokenizer::get_instance()->reset(std::string_view(value.get<StringData>())).get_all_tokens();
198✔
1782

99✔
1783
            for (auto& word : words) {
936✔
1784
                Mixed m(word);
936✔
1785
                insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
936✔
1786
            }
936✔
1787
        }
198✔
1788
    }
198✔
1789
    else {
2,548,509✔
1790
        insert_with_offset(key, value.get_index_data(buffer), value, offset); // Throws
2,548,509✔
1791
    }
2,548,509✔
1792
}
2,548,707✔
1793

1794
void StringIndex::set(ObjKey key, const Mixed& new_value)
1795
{
660,918✔
1796
    StringConversionBuffer buffer;
660,918✔
1797
    Mixed old_value = get(key);
660,918✔
1798

330,156✔
1799
    if (this->m_target_column.tokenize()) {
660,918✔
1800
        auto tokenizer = Tokenizer::get_instance();
168✔
1801
        StringData old_string = old_value.get_index_data(buffer);
168✔
1802
        std::set<std::string> old_words;
168✔
1803

84✔
1804
        if (old_string.size() > 0) {
168✔
1805
            tokenizer->reset({old_string.data(), old_string.size()});
6✔
1806
            old_words = tokenizer->get_all_tokens();
6✔
1807
        }
6✔
1808
        std::set<std::string> new_words;
168✔
1809
        if (new_value.is_type(type_String)) {
168✔
1810
            new_words = tokenizer->reset(std::string_view(new_value.get<StringData>())).get_all_tokens();
168✔
1811
        }
168✔
1812

84✔
1813
        auto w1 = old_words.begin();
168✔
1814
        auto w2 = new_words.begin();
168✔
1815

84✔
1816
        // Do a diff, deleting words no longer present and
84✔
1817
        // inserting new words
84✔
1818
        while (w1 != old_words.end() && w2 != new_words.end()) {
540✔
1819
            if (*w1 < *w2) {
372✔
1820
                erase_string(key, *w1);
126✔
1821
                ++w1;
126✔
1822
            }
126✔
1823
            else if (*w2 < *w1) {
246✔
1824
                Mixed m(*w2);
198✔
1825
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
198✔
1826
                ++w2;
198✔
1827
            }
198✔
1828
            else {
48✔
1829
                ++w1;
48✔
1830
                ++w2;
48✔
1831
            }
48✔
1832
        }
372✔
1833
        while (w1 != old_words.end()) {
168✔
1834
            erase_string(key, *w1);
×
1835
            ++w1;
×
1836
        }
×
1837
        while (w2 != new_words.end()) {
3,354✔
1838
            Mixed m(*w2);
3,186✔
1839
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
3,186✔
1840

1,593✔
1841
            ++w2;
3,186✔
1842
        }
3,186✔
1843
    }
168✔
1844
    else {
660,750✔
1845
        if (REALM_LIKELY(new_value != old_value)) {
660,750✔
1846
            // We must erase this row first because erase uses find_first which
316,647✔
1847
            // might find the duplicate if we insert before erasing.
316,647✔
1848
            erase(key); // Throws
633,855✔
1849

316,647✔
1850
            auto index_data = new_value.get_index_data(buffer);
633,855✔
1851
            insert_with_offset(key, index_data, new_value, 0); // Throws
633,855✔
1852
        }
633,855✔
1853
    }
660,750✔
1854
}
660,918✔
1855

1856
void StringIndex::node_add_key(ref_type ref)
1857
{
162✔
1858
    REALM_ASSERT(ref);
162✔
1859
    REALM_ASSERT(m_array->is_inner_bptree_node());
162✔
1860

72✔
1861
    Allocator& alloc = m_array->get_alloc();
162✔
1862
    Array offsets(alloc);
162✔
1863
    get_child(*m_array, 0, offsets);
162✔
1864
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
162✔
1865
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
162✔
1866

72✔
1867
    Array new_top(alloc);
162✔
1868
    Array new_offsets(alloc);
162✔
1869
    new_top.init_from_ref(ref);
162✔
1870
    new_offsets.init_from_ref(new_top.get_as_ref(0));
162✔
1871
    REALM_ASSERT(!new_offsets.is_empty());
162✔
1872

72✔
1873
    int64_t key = new_offsets.back();
162✔
1874
    offsets.add(key);
162✔
1875
    m_array->add(ref);
162✔
1876
}
162✔
1877

1878
// Must return true if value of object(key) is less than 'b'.
1879
bool SortedListComparator::operator()(int64_t key_value, const Mixed& b) // used in lower_bound
1880
{
×
1881
    Mixed a = m_column.get_value(ObjKey(key_value));
×
1882
    if (a.is_null() && !b.is_null())
×
1883
        return true;
×
1884
    else if (b.is_null() && !a.is_null())
×
1885
        return false;
×
1886
    else if (a.is_null() && b.is_null())
×
1887
        return false;
×
1888
    return a.compare(b) < 0;
×
1889
}
×
1890

1891

1892
// Must return true if value of 'a' is less than value of object(key).
1893
bool SortedListComparator::operator()(const Mixed& a, int64_t key_value) // used in upper_bound
1894
{
×
1895
    Mixed b = m_column.get_value(ObjKey(key_value));
×
1896
    if (a.is_null() && !b.is_null())
×
1897
        return true;
×
1898
    else if (b.is_null() && !a.is_null())
×
1899
        return false;
×
1900
    else if (a.is_null() && b.is_null())
×
1901
        return false;
×
1902
    return a.compare(b) < 0;
×
1903
}
×
1904

1905
// TODO: the next time the StringIndex is migrated (post version 23) and sorted
1906
// properly in these lists replace this method with
1907
// std::lower_bound(key_values.cbegin(), it_end, value, slc);
1908
IntegerColumn::const_iterator SortedListComparator::find_start_of_unsorted(const Mixed& value,
1909
                                                                           const IntegerColumn& key_values) const
1910
{
2,133,468✔
1911
    if (key_values.size() >= 2) {
2,133,468✔
1912
        Mixed first = m_column.get_value(ObjKey(key_values.get(0)));
2,114,784✔
1913
        Mixed last = m_column.get_value(ObjKey(key_values.get(key_values.size() - 1)));
2,114,784✔
1914
        if (first == last) {
2,114,784✔
1915
            if (value.compare(first) <= 0) {
2,108,367✔
1916
                return key_values.cbegin();
2,107,221✔
1917
            }
2,107,221✔
1918
            else {
1,146✔
1919
                return key_values.cend();
1,146✔
1920
            }
1,146✔
1921
        }
25,101✔
1922
    }
2,114,784✔
1923

10,263✔
1924
    IntegerColumn::const_iterator it = key_values.cbegin();
25,101✔
1925
    IntegerColumn::const_iterator end = key_values.cend();
25,101✔
1926
    IntegerColumn::const_iterator first_greater = end;
25,101✔
1927
    while (it != end) {
206,622✔
1928
        Mixed val_it = m_column.get_value(ObjKey(*it));
202,050✔
1929
        int cmp = val_it.compare(value);
202,050✔
1930
        if (cmp == 0) {
202,050✔
1931
            return it;
20,529✔
1932
        }
20,529✔
1933
        if (cmp > 0 && first_greater == end) {
181,521✔
1934
            first_greater = it;
879✔
1935
        }
879✔
1936
        ++it;
181,521✔
1937
    }
181,521✔
1938
    return first_greater;
12,363✔
1939
}
25,101✔
1940

1941
// TODO: same as above, change to std::upper_bound(lower, it_end, value, slc);
1942
IntegerColumn::const_iterator SortedListComparator::find_end_of_unsorted(const Mixed& value,
1943
                                                                         const IntegerColumn& key_values,
1944
                                                                         IntegerColumn::const_iterator begin) const
1945
{
2,120,229✔
1946
    IntegerColumn::const_iterator end = key_values.cend();
2,120,229✔
1947
    if (begin != end && end - begin > 0) {
2,120,229✔
1948
        // optimization: check the last element first
1,058,826✔
1949
        Mixed last = m_column.get_value(ObjKey(*(key_values.cend() - 1)));
2,120,052✔
1950
        if (last == value) {
2,120,052✔
1951
            return key_values.cend();
2,119,020✔
1952
        }
2,119,020✔
1953
    }
1,209✔
1954
    while (begin != end) {
110,895✔
1955
        Mixed val_it = m_column.get_value(ObjKey(*begin));
110,895✔
1956
        if (value.compare(val_it) != 0) {
110,895✔
1957
            return begin;
1,281✔
1958
        }
1,281✔
1959
        ++begin;
109,614✔
1960
    }
109,614✔
1961
    return end;
4,294,967,294✔
1962
}
1,209✔
1963

1964
// LCOV_EXCL_START ignore debug functions
1965
void StringIndex::verify() const
1966
{
4,248✔
1967
#ifdef REALM_DEBUG
4,248✔
1968
    m_array->verify();
4,248✔
1969

2,124✔
1970
    Allocator& alloc = m_array->get_alloc();
4,248✔
1971
    const size_t array_size = m_array->size();
4,248✔
1972

2,124✔
1973
    // Get first matching row for every key
2,124✔
1974
    if (m_array->is_inner_bptree_node()) {
4,248✔
1975
        for (size_t i = 1; i < array_size; ++i) {
×
1976
            size_t ref = m_array->get_as_ref(i);
×
1977
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1978
            ndx.verify();
×
1979
        }
×
1980
    }
×
1981
    else {
4,248✔
1982
        size_t column_size = m_target_column.size();
4,248✔
1983
        for (size_t i = 1; i < array_size; ++i) {
8,904✔
1984
            int64_t ref = m_array->get(i);
4,656✔
1985

2,328✔
1986
            // low bit set indicate literal ref (shifted)
2,328✔
1987
            if (ref & 1) {
4,656✔
1988
                size_t r = to_size_t((uint64_t(ref) >> 1));
480✔
1989
                REALM_ASSERT_EX(r < column_size, r, column_size);
480✔
1990
            }
480✔
1991
            else {
4,176✔
1992
                // A real ref either points to a list or a subindex
2,088✔
1993
                char* header = alloc.translate(to_ref(ref));
4,176✔
1994
                if (Array::get_context_flag_from_header(header)) {
4,176✔
1995
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
4,128✔
1996
                    ndx.verify();
4,128✔
1997
                }
4,128✔
1998
                else {
48✔
1999
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
48✔
2000
                    IntegerColumn::const_iterator it = sub.cbegin();
48✔
2001
                    IntegerColumn::const_iterator it_end = sub.cend();
48✔
2002
                    SortedListComparator slc(m_target_column);
48✔
2003
                    Mixed previous = get(ObjKey(*it));
48✔
2004
                    size_t last_row = to_size_t(*it);
48✔
2005

24✔
2006
                    // Check that strings listed in sub are in sorted order
24✔
2007
                    // and if there are duplicates, that the row numbers are
24✔
2008
                    // sorted in the group of duplicates.
24✔
2009
                    while (it != it_end) {
168✔
2010
                        Mixed it_data = get(ObjKey(*it));
120✔
2011
                        size_t it_row = to_size_t(*it);
120✔
2012
                        REALM_ASSERT(previous <= it_data);
120✔
2013
                        if (it != sub.cbegin() && previous == it_data) {
120✔
2014
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
×
2015
                        }
×
2016
                        last_row = it_row;
120✔
2017
                        previous = get(ObjKey(*it));
120✔
2018
                        ++it;
120✔
2019
                    }
120✔
2020
                }
48✔
2021
            }
4,176✔
2022
        }
4,656✔
2023
    }
4,248✔
2024
// FIXME: Extend verification along the lines of IntegerColumn::verify().
2,124✔
2025
#endif
4,248✔
2026
}
4,248✔
2027

2028
#ifdef REALM_DEBUG
2029

2030
template <class T>
2031
void StringIndex::verify_entries(const ClusterColumn& column) const
2032
{
2033
    std::vector<ObjKey> results;
2034

2035
    auto it = column.begin();
2036
    auto end = column.end();
2037
    auto col = column.get_column_key();
2038
    while (it != end) {
2039
        ObjKey key = it->get_key();
2040
        T value = it->get<T>(col);
2041

2042
        find_all(results, value);
2043

2044
        auto ndx = find(results.begin(), results.end(), key);
2045
        REALM_ASSERT(ndx != results.end());
2046
        size_t found = count(value);
2047
        REALM_ASSERT_EX(found >= 1, found);
2048
        results.clear();
2049
    }
2050
}
2051

2052
namespace {
2053

2054
bool is_chars(uint64_t val)
2055
{
×
2056
    if (val == 0)
×
2057
        return true;
×
2058
    if (is_chars(val >> 8)) {
×
2059
        char c = val & 0xFF;
×
2060
        if (!c || std::isprint(c)) {
×
2061
            return true;
×
2062
        }
×
2063
    }
×
2064
    return false;
×
2065
}
×
2066

2067
void out_char(std::ostream& out, uint64_t val)
2068
{
×
2069
    if (val) {
×
2070
        out_char(out, val >> 8);
×
2071
        char c = val & 0xFF;
×
2072
        if (c && c != 'X') {
×
2073
            out << c;
×
2074
        }
×
2075
    }
×
2076
}
×
2077

2078
void out_hex(std::ostream& out, uint64_t val)
2079
{
×
2080
    if (is_chars(val)) {
×
2081
        out_char(out, val);
×
2082
    }
×
2083
    else {
×
2084
        out << int(val);
×
2085
    }
×
2086
}
×
2087

2088
} // namespace
2089

2090
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
2091
{
×
2092
    int indent = level * 2;
×
2093
    Allocator& alloc = node.get_alloc();
×
2094
    Array subnode(alloc);
×
2095

2096
    size_t node_size = node.size();
×
2097
    REALM_ASSERT(node_size >= 1);
×
2098

2099
    out << std::hex;
×
2100

2101
    bool node_is_leaf = !node.is_inner_bptree_node();
×
2102
    if (node_is_leaf) {
×
2103
        out << std::setw(indent) << ""
×
2104
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
×
2105
    }
×
2106
    else {
×
2107
        out << std::setw(indent) << ""
×
2108
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
2109
    }
×
2110

2111
    subnode.init_from_ref(to_ref(node.front()));
×
2112
    out << std::setw(indent) << ""
×
2113
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
2114
    if (subnode.is_empty()) {
×
2115
        out << "no keys";
×
2116
    }
×
2117
    else {
×
2118
        out << "keys: ";
×
2119
        for (size_t i = 0; i != subnode.size(); ++i) {
×
2120
            if (i != 0)
×
2121
                out << ", ";
×
2122
            out_hex(out, uint32_t(subnode.get(i)));
×
2123
        }
×
2124
    }
×
2125
    out << ")\n";
×
2126

2127
    if (node_is_leaf) {
×
2128
        for (size_t i = 1; i != node_size; ++i) {
×
2129
            int_fast64_t value = node.get(i);
×
2130
            bool is_single_row_index = (value & 1) != 0;
×
2131
            if (is_single_row_index) {
×
2132
                out << std::setw(indent) << ""
×
2133
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
2134
                continue;
×
2135
            }
×
2136
            subnode.init_from_ref(to_ref(value));
×
2137
            bool is_subindex = subnode.get_context_flag();
×
2138
            if (is_subindex) {
×
2139
                out << std::setw(indent) << ""
×
2140
                    << "  Subindex\n";
×
2141
                dump_node_structure(subnode, out, level + 2);
×
2142
                continue;
×
2143
            }
×
2144
            IntegerColumn indexes(alloc, to_ref(value));
×
2145
            out << std::setw(indent) << ""
×
2146
                << "  List of row indexes\n";
×
2147
            indexes.dump_values(out, level + 2);
×
2148
        }
×
2149
        return;
×
2150
    }
×
2151

2152

2153
    size_t num_children = node_size - 1;
×
2154
    size_t child_ref_begin = 1;
×
2155
    size_t child_ref_end = 1 + num_children;
×
2156
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
2157
        subnode.init_from_ref(node.get_as_ref(i));
×
2158
        dump_node_structure(subnode, out, level + 1);
×
2159
    }
×
2160
}
×
2161

2162
void StringIndex::print() const
2163
{
×
2164
    dump_node_structure(*m_array, std::cout, 0);
×
2165
}
×
2166

2167
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc