• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2040

14 Feb 2024 09:17PM UTC coverage: 91.863% (-0.02%) from 91.881%
2040

push

Evergreen

web-flow
Merge pull request #7341 from realm/tg/file-copy-race

File a minor race condition in the backup realm file action

93078 of 171514 branches covered (54.27%)

50 of 52 new or added lines in 6 files covered. (96.15%)

121 existing lines in 10 files now uncovered.

235427 of 256280 relevant lines covered (91.86%)

6415204.04 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.85
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/timestamp.hpp>
31
#include <realm/column_integer.hpp>
32
#include <realm/unicode.hpp>
33
#include <realm/tokenizer.hpp>
34

35
using namespace realm;
36
using namespace realm::util;
37

38
namespace {
39

40
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
41
{
19,379,289✔
42
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
19,379,289✔
43
    child.init_from_ref(child_ref);
19,379,289✔
44
    child.set_parent(&parent, child_ref_ndx);
19,379,289✔
45
}
19,379,289✔
46

47
// This method reconstructs the string inserted in the search index based on a string
48
// that matches so far and the last key (only works if complete strings are stored in the index)
49
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
50
{
1,086✔
51
    if (key == 0)
1,086✔
52
        return StringData();
×
53

543✔
54
    size_t rest_len = 4;
1,086✔
55
    char* k = reinterpret_cast<char*>(&key);
1,086✔
56
    if (k[0] == 'X')
1,086✔
57
        rest_len = 3;
234✔
58
    else if (k[1] == 'X')
852✔
59
        rest_len = 2;
360✔
60
    else if (k[2] == 'X')
492✔
61
        rest_len = 1;
228✔
62
    else if (k[3] == 'X')
264✔
63
        rest_len = 0;
36✔
64

543✔
65
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,086✔
66

543✔
67
    return StringData(new_string.data(), offset + rest_len);
1,086✔
68
}
1,086✔
69

70

71
} // anonymous namespace
72

73
DataType ClusterColumn::get_data_type() const
74
{
×
75
    const Table* table = m_cluster_tree->get_owning_table();
×
76
    return table->get_column_type(m_column_key);
×
77
}
×
78

79
Mixed ClusterColumn::get_value(ObjKey key) const
80
{
43,004,490✔
81
    const Obj obj{m_cluster_tree->get(key)};
43,004,490✔
82
    return obj.get_any(m_column_key);
43,004,490✔
83
}
43,004,490✔
84

85
std::vector<ObjKey> ClusterColumn::get_all_keys() const
86
{
18✔
87
    std::vector<ObjKey> ret;
18✔
88
    ret.reserve(m_cluster_tree->size());
18✔
89
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
90
        auto sz = cluster->node_size();
18✔
91
        for (size_t i = 0; i < sz; i++) {
198✔
92
            ret.push_back(cluster->get_real_key(i));
180✔
93
        }
180✔
94

9✔
95
        return IteratorControl::AdvanceToNext;
18✔
96
    });
18✔
97
    return ret;
18✔
98
}
18✔
99

100
template <>
101
int64_t IndexArray::from_list<index_FindFirst>(Mixed value, InternalFindResult& /* result_ref */,
102
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
103
{
5,229✔
104
    SortedListComparator slc(column);
5,229✔
105

2,871✔
106
    IntegerColumn::const_iterator it_end = key_values.cend();
5,229✔
107
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
5,229✔
108
    if (lower == it_end)
5,229✔
109
        return null_key.value;
×
110

2,871✔
111
    int64_t first_key_value = *lower;
5,229✔
112

2,871✔
113
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,229✔
114
    if (actual != value)
5,229✔
115
        return null_key.value;
×
116

2,871✔
117
    return first_key_value;
5,229✔
118
}
5,229✔
119

120
template <>
121
int64_t IndexArray::from_list<index_Count>(Mixed value, InternalFindResult& /* result_ref */,
122
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
123
{
8,247✔
124
    SortedListComparator slc(column);
8,247✔
125

4,050✔
126
    IntegerColumn::const_iterator it_end = key_values.cend();
8,247✔
127
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
8,247✔
128
    if (lower == it_end)
8,247✔
129
        return 0;
24✔
130

4,038✔
131
    int64_t first_key_value = *lower;
8,223✔
132

4,038✔
133
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,223✔
134
    if (actual != value)
8,223✔
135
        return 0;
24✔
136

4,026✔
137
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
8,199✔
138
    int64_t cnt = upper - lower;
8,199✔
139

4,026✔
140
    return cnt;
8,199✔
141
}
8,199✔
142

143
template <>
144
int64_t IndexArray::from_list<index_FindAll_nocopy>(Mixed value, InternalFindResult& result_ref,
145
                                                    const IntegerColumn& key_values,
146
                                                    const ClusterColumn& column) const
147
{
2,142✔
148
    SortedListComparator slc(column);
2,142✔
149
    IntegerColumn::const_iterator it_end = key_values.cend();
2,142✔
150
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
2,142✔
151
    if (lower == it_end)
2,142✔
152
        return size_t(FindRes_not_found);
57✔
153

1,023✔
154
    ObjKey first_key = ObjKey(*lower);
2,085✔
155

1,023✔
156
    Mixed actual = column.get_value(ObjKey(first_key));
2,085✔
157
    if (actual != value)
2,085✔
158
        return size_t(FindRes_not_found);
63✔
159

990✔
160
    // Optimization: check the last entry before trying upper bound.
990✔
161
    IntegerColumn::const_iterator upper = it_end;
2,022✔
162
    --upper;
2,022✔
163
    // Single result if upper matches lower
990✔
164
    if (upper == lower) {
2,022✔
165
        result_ref.payload = *lower;
309✔
166
        return size_t(FindRes_single);
309✔
167
    }
309✔
168

843✔
169
    // Check string value at upper, if equal return matches in (lower, upper]
843✔
170
    ObjKey last_key = ObjKey(*upper);
1,713✔
171
    actual = column.get_value(ObjKey(last_key));
1,713✔
172
    if (actual == value) {
1,713✔
173
        result_ref.payload = from_ref(key_values.get_ref());
1,233✔
174
        result_ref.start_ndx = lower.get_position();
1,233✔
175
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,233✔
176
        return size_t(FindRes_column);
1,233✔
177
    }
1,233✔
178

231✔
179
    // Last result is not equal, find the upper bound of the range of results.
231✔
180
    // Note that we are passing upper which is cend() - 1 here as we already
231✔
181
    // checked the last item manually.
231✔
182
    upper = std::upper_bound(lower, upper, value, slc);
480✔
183

231✔
184
    result_ref.payload = from_ref(key_values.get_ref());
480✔
185
    result_ref.start_ndx = lower.get_position();
480✔
186
    result_ref.end_ndx = upper.get_position();
480✔
187
    return size_t(FindRes_column);
480✔
188
}
480✔
189

190

191
template <IndexMethod method>
192
int64_t IndexArray::index_string(Mixed value, InternalFindResult& result_ref, const ClusterColumn& column) const
193
{
1,899,627✔
194
    // Return`realm::not_found`, or an index to the (any) match
941,379✔
195
    constexpr bool first(method == index_FindFirst);
1,899,627✔
196
    // Return 0, or the number of items that match the specified `value`
941,379✔
197
    constexpr bool get_count(method == index_Count);
1,899,627✔
198
    // Same as `index_FindAll` but does not copy matching rows into `column`
941,379✔
199
    // returns FindRes_not_found if there are no matches
941,379✔
200
    // returns FindRes_single and the row index (literal) in result_ref.payload
941,379✔
201
    // or returns FindRes_column and the reference to a column of duplicates in
941,379✔
202
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
941,379✔
203
    constexpr bool allnocopy(method == index_FindAll_nocopy);
1,899,627✔
204

941,379✔
205
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
1,899,627✔
206

941,379✔
207
    const char* data = m_data;
1,899,627✔
208
    const char* header;
1,899,627✔
209
    uint_least8_t width = m_width;
1,899,627✔
210
    bool is_inner_node = m_is_inner_bptree_node;
1,899,627✔
211
    typedef StringIndex::key_type key_type;
1,899,627✔
212
    size_t stringoffset = 0;
1,899,627✔
213

941,379✔
214
    StringConversionBuffer buffer;
1,899,627✔
215
    StringData index_data = value.get_index_data(buffer);
1,899,627✔
216

941,379✔
217
    // Create 4 byte index key
941,379✔
218
    key_type key = StringIndex::create_key(index_data, stringoffset);
1,899,627✔
219

941,379✔
220
    for (;;) {
2,402,994✔
221
        // Get subnode table
1,193,565✔
222
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
2,402,994✔
223

1,193,565✔
224
        // Find the position matching the key
1,193,565✔
225
        const char* offsets_header = m_alloc.translate(offsets_ref);
2,402,994✔
226
        const char* offsets_data = get_data_from_header(offsets_header);
2,402,994✔
227
        size_t offsets_size = get_size_from_header(offsets_header);
2,402,994✔
228
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,402,994✔
229

1,193,565✔
230
        // If key is outside range, we know there can be no match
1,193,565✔
231
        if (pos == offsets_size)
2,402,994✔
232
            return local_not_found;
840,501✔
233

775,947✔
234
        // Get entry under key
775,947✔
235
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,562,493✔
236
        uint64_t ref = get_direct(data, width, pos_refs);
1,562,493✔
237

775,947✔
238
        if (is_inner_node) {
1,562,493✔
239
            // Set vars for next iteration
26,649✔
240
            header = m_alloc.translate(to_ref(ref));
62,253✔
241
            data = get_data_from_header(header);
62,253✔
242
            width = get_width_from_header(header);
62,253✔
243
            is_inner_node = get_is_inner_bptree_node_from_header(header);
62,253✔
244
            continue;
62,253✔
245
        }
62,253✔
246

749,298✔
247
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,500,240✔
248

749,298✔
249
        if (stored_key != key)
1,500,240✔
250
            return local_not_found;
182,811✔
251

666,117✔
252
        // Literal row index (tagged)
666,117✔
253
        if (ref & 1) {
1,317,429✔
254
            int64_t key_value = int64_t(ref >> 1);
860,448✔
255

432,519✔
256
            Mixed a = column.is_fulltext() ? reconstruct_string(stringoffset, key, index_data)
432,603✔
257
                                           : column.get_value(ObjKey(key_value));
860,364✔
258
            if (a == value) {
860,448✔
259
                result_ref.payload = key_value;
857,427✔
260
                return first ? key_value : get_count ? 1 : FindRes_single;
849,114✔
261
            }
857,427✔
262
            return local_not_found;
3,021✔
263
        }
3,021✔
264

233,598✔
265
        const char* sub_header = m_alloc.translate(ref_type(ref));
456,981✔
266
        const bool sub_isindex = get_context_flag_from_header(sub_header);
456,981✔
267

233,598✔
268
        // List of row indices with common prefix up to this point, in sorted order.
233,598✔
269
        if (!sub_isindex) {
456,981✔
270
            const IntegerColumn sub(m_alloc, ref_type(ref));
15,906✔
271
            if (column.is_fulltext()) {
15,906✔
272
                result_ref.payload = ref;
288✔
273
                result_ref.start_ndx = 0;
288✔
274
                result_ref.end_ndx = sub.size();
288✔
275
                return FindRes_column;
288✔
276
            }
288✔
277

7,974✔
278
            return from_list<method>(value, result_ref, sub, column);
15,618✔
279
        }
15,618✔
280

225,480✔
281
        // Recurse into sub-index;
225,480✔
282
        header = sub_header;
441,075✔
283
        data = get_data_from_header(header);
441,075✔
284
        width = get_width_from_header(header);
441,075✔
285
        is_inner_node = get_is_inner_bptree_node_from_header(header);
441,075✔
286

225,480✔
287
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
225,480✔
288
        stringoffset += 4;
441,075✔
289

225,480✔
290
        // Update 4 byte index key
225,480✔
291
        key = StringIndex::create_key(index_data, stringoffset);
441,075✔
292
    }
441,075✔
293
}
1,899,627✔
294

295

296
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
297
                                   const ClusterColumn& column) const
298
{
1,422✔
299
    // optimization for the most common case, where all the strings under a given subindex are equal
711✔
300
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
1,422✔
301
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
302
    if (first == last) {
1,422✔
303
        if (!first.is_type(type_String))
102✔
304
            return;
×
305
        auto first_str_upper = case_map(first.get_string(), true);
102✔
306
        if (first_str_upper != upper_value) {
102✔
307
            return;
24✔
308
        }
24✔
309

39✔
310
        size_t sz = result.size() + rows.size();
78✔
311
        result.reserve(sz);
78✔
312
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
246✔
313
            result.push_back(ObjKey(*it));
168✔
314
        }
168✔
315
        return;
78✔
316
    }
78✔
317

660✔
318
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
319
    // same subindex column, but still not be identical
660✔
320
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
66,792✔
321
        ObjKey key = ObjKey(*it);
65,472✔
322
        Mixed val = column.get_value(key);
65,472✔
323
        if (val.is_type(type_String)) {
65,472✔
324
            auto upper_str = case_map(val.get_string(), true);
65,472✔
325
            if (upper_str == upper_value) {
65,472✔
326
                result.push_back(key);
1,440✔
327
            }
1,440✔
328
        }
65,472✔
329
    }
65,472✔
330

660✔
331
    return;
1,320✔
332
}
1,320✔
333

334

335
void IndexArray::from_list_all(Mixed value, std::vector<ObjKey>& result, const IntegerColumn& rows,
336
                               const ClusterColumn& column) const
337
{
42,213✔
338
    SortedListComparator slc(column);
42,213✔
339

20,661✔
340
    IntegerColumn::const_iterator it_end = rows.cend();
42,213✔
341
    IntegerColumn::const_iterator lower = std::lower_bound(rows.cbegin(), it_end, value, slc);
42,213✔
342
    if (lower == it_end)
42,213✔
343
        return;
×
344

20,661✔
345
    ObjKey key = ObjKey(*lower);
42,213✔
346

20,661✔
347
    Mixed a = column.get_value(key);
42,213✔
348
    if (a != value)
42,213✔
349
        return;
×
350

20,661✔
351
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
42,213✔
352

20,661✔
353
    // Copy all matches into result column
20,661✔
354
    size_t sz = result.size() + (upper - lower);
42,213✔
355
    result.reserve(sz);
42,213✔
356
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
736,776✔
357
        result.push_back(ObjKey(*it));
694,563✔
358
    }
694,563✔
359

20,661✔
360
    return;
42,213✔
361
}
42,213✔
362

363

364
namespace {
365

366
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
367

368
// replicates the 4 least significant bits each times 8
369
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
370
uint32_t replicate_4_lsb_x8(uint32_t i)
371
{
3,123,552✔
372
    REALM_ASSERT_DEBUG(i <= 15);
3,123,552✔
373
    i *= 0x204081;
3,123,552✔
374
    i &= 0x1010101;
3,123,552✔
375
    i *= 0xff;
3,123,552✔
376
    return i;
3,123,552✔
377
}
3,123,552✔
378

379
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
3,123,552✔
380
    return a ^ ((a ^ b) & mask);
3,123,552✔
381
}
3,123,552✔
382

383
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
384
// final key:
385
// Permutation 0  = "ABCD"
386
// Permutation 1  = "ABCd"
387
// Permutation 8  = "aBCD"
388
// Permutation 15 = "abcd"
389
using key_type = StringIndex::key_type;
390
key_type generate_key(key_type upper, key_type lower, int permutation) {
3,123,552✔
391
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
3,123,552✔
392
}
3,123,552✔
393

394

395
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
396
// when traversing the trees.
397
struct SearchList {
398
    struct Item {
399
        const char* header;
400
        size_t string_offset;
401
        key_type key;
402
    };
403

404
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
405
        : m_upper_value(upper_value)
406
        , m_lower_value(lower_value)
407
    {
6,576✔
408
        m_keys_seen.reserve(num_permutations);
6,576✔
409
    }
6,576✔
410

411
    // Add all unique keys for this level to the internal work stack
412
    void add_all_for_level(const char* header, size_t string_offset)
413
    {
195,222✔
414
        m_keys_seen.clear();
195,222✔
415
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,222✔
416
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,222✔
417
        for (int p = 0; p < num_permutations; ++p) {
3,318,774✔
418
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,561,776✔
419
            // size) being combined incorrectly.
1,561,776✔
420
            const key_type key = generate_key(upper_key, lower_key, p);
3,123,552✔
421
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
3,123,552✔
422
            if (new_key) {
3,123,552✔
423
                m_keys_seen.push_back(key);
3,059,562✔
424
                add_next(header, string_offset, key);
3,059,562✔
425
            }
3,059,562✔
426
        }
3,123,552✔
427
    }
195,222✔
428

429
    bool empty() const
430
    {
3,066,138✔
431
        return m_items.empty();
3,066,138✔
432
    }
3,066,138✔
433

434
    Item get_next()
435
    {
3,059,562✔
436
        Item item = m_items.back();
3,059,562✔
437
        m_items.pop_back();
3,059,562✔
438
        return item;
3,059,562✔
439
    }
3,059,562✔
440

441
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
442
    void add_next(const char* header, size_t string_offset, key_type key)
443
    {
3,059,562✔
444
        m_items.push_back({header, string_offset, key});
3,059,562✔
445
    }
3,059,562✔
446

447
private:
448
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
449

450
    std::vector<Item> m_items;
451

452
    const util::Optional<std::string> m_upper_value;
453
    const util::Optional<std::string> m_lower_value;
454

455
    std::vector<key_type> m_keys_seen;
456
};
457

458

459
} // namespace
460

461

462
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
463
                                      const ClusterColumn& column) const
464
{
6,576✔
465
    REALM_ASSERT(!value.is_null());
6,576✔
466

3,288✔
467
    const util::Optional<std::string> upper_value = case_map(value, true);
6,576✔
468
    const util::Optional<std::string> lower_value = case_map(value, false);
6,576✔
469
    SearchList search_list(upper_value, lower_value);
6,576✔
470

3,288✔
471
    const char* top_header = get_header_from_data(m_data);
6,576✔
472
    search_list.add_all_for_level(top_header, 0);
6,576✔
473

3,288✔
474
    while (!search_list.empty()) {
3,066,138✔
475
        SearchList::Item item = search_list.get_next();
3,059,562✔
476

1,529,781✔
477
        const char* const header = item.header;
3,059,562✔
478
        const size_t string_offset = item.string_offset;
3,059,562✔
479
        const key_type key = item.key;
3,059,562✔
480
        const char* const data = get_data_from_header(header);
3,059,562✔
481
        const uint_least8_t width = get_width_from_header(header);
3,059,562✔
482
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,059,562✔
483

1,529,781✔
484
        // Get subnode table
1,529,781✔
485
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
3,059,562✔
486

1,529,781✔
487
        // Find the position matching the key
1,529,781✔
488
        const char* const offsets_header = m_alloc.translate(offsets_ref);
3,059,562✔
489
        const char* const offsets_data = get_data_from_header(offsets_header);
3,059,562✔
490
        const size_t offsets_size = get_size_from_header(offsets_header);
3,059,562✔
491
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,059,562✔
492

1,529,781✔
493
        // If key is outside range, we know there can be no match
1,529,781✔
494
        if (pos == offsets_size)
3,059,562✔
495
            continue;
810✔
496

1,529,376✔
497
        // Get entry under key
1,529,376✔
498
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
3,058,752✔
499
        const uint64_t ref = get_direct(data, width, pos_refs);
3,058,752✔
500

1,529,376✔
501
        if (is_inner_node) {
3,058,752✔
502
            // Set vars for next iteration
503
            const char* const inner_header = m_alloc.translate(to_ref(ref));
×
504
            search_list.add_next(inner_header, string_offset, key);
×
505
            continue;
×
506
        }
×
507

1,529,376✔
508
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
3,058,752✔
509

1,529,376✔
510
        if (stored_key != key)
3,058,752✔
511
            continue;
2,863,044✔
512

97,854✔
513
        // Literal row index (tagged)
97,854✔
514
        if (ref & 1) {
195,708✔
515
            ObjKey k(int64_t(ref >> 1));
5,640✔
516

2,820✔
517
            // The buffer is needed when for when this is an integer index.
2,820✔
518
            StringConversionBuffer buffer;
5,640✔
519
            const StringData str = column.get_value(k).get_index_data(buffer);
5,640✔
520
            const util::Optional<std::string> upper_str = case_map(str, true);
5,640✔
521
            if (upper_str == upper_value) {
5,640✔
522
                result.push_back(k);
5,592✔
523
            }
5,592✔
524
            continue;
5,640✔
525
        }
5,640✔
526

95,034✔
527
        const char* const sub_header = m_alloc.translate(ref_type(ref));
190,068✔
528
        const bool sub_isindex = get_context_flag_from_header(sub_header);
190,068✔
529

95,034✔
530
        // List of row indices with common prefix up to this point, in sorted order.
95,034✔
531
        if (!sub_isindex) {
190,068✔
532
            const IntegerColumn sub(m_alloc, ref_type(ref));
1,422✔
533
            from_list_all_ins(upper_value, result, sub, column);
1,422✔
534
            continue;
1,422✔
535
        }
1,422✔
536

94,323✔
537
        // Recurse into sub-index;
94,323✔
538
        search_list.add_all_for_level(sub_header, string_offset + 4);
188,646✔
539
    }
188,646✔
540

3,288✔
541
    // sort the result and return a std::vector
3,288✔
542
    std::sort(result.begin(), result.end());
6,576✔
543
}
6,576✔
544

545

546
void IndexArray::index_string_all(Mixed value, std::vector<ObjKey>& result, const ClusterColumn& column) const
547
{
50,007✔
548
    const char* data = m_data;
50,007✔
549
    const char* header;
50,007✔
550
    uint_least8_t width = m_width;
50,007✔
551
    bool is_inner_node = m_is_inner_bptree_node;
50,007✔
552
    size_t stringoffset = 0;
50,007✔
553

24,534✔
554
    StringConversionBuffer buffer;
50,007✔
555
    StringData index_data = value.get_index_data(buffer);
50,007✔
556
    // Create 4 byte index key
24,534✔
557
    key_type key = StringIndex::create_key(index_data, stringoffset);
50,007✔
558

24,534✔
559
    for (;;) {
78,345✔
560
        // Get subnode table
38,721✔
561
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
78,345✔
562

38,721✔
563
        // Find the position matching the key
38,721✔
564
        const char* offsets_header = m_alloc.translate(offsets_ref);
78,345✔
565
        const char* offsets_data = get_data_from_header(offsets_header);
78,345✔
566
        size_t offsets_size = get_size_from_header(offsets_header);
78,345✔
567
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
78,345✔
568

38,721✔
569
        // If key is outside range, we know there can be no match
38,721✔
570
        if (pos == offsets_size)
78,345✔
571
            return;
×
572

38,721✔
573
        // Get entry under key
38,721✔
574
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
78,345✔
575
        uint64_t ref = get_direct(data, width, pos_refs);
78,345✔
576

38,721✔
577
        if (is_inner_node) {
78,345✔
578
            // Set vars for next iteration
579
            header = m_alloc.translate(ref_type(ref));
×
580
            data = get_data_from_header(header);
×
581
            width = get_width_from_header(header);
×
582
            is_inner_node = get_is_inner_bptree_node_from_header(header);
×
583
            continue;
×
584
        }
×
585

38,721✔
586
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
78,345✔
587

38,721✔
588
        if (stored_key != key)
78,345✔
589
            return;
6,120✔
590

35,661✔
591
        // Literal row index (tagged)
35,661✔
592
        if (ref & 1) {
72,225✔
593
            ObjKey k(int64_t(ref >> 1));
1,674✔
594

813✔
595
            Mixed a = column.get_value(k);
1,674✔
596
            if (a == value) {
1,674✔
597
                result.push_back(k);
1,674✔
598
                return;
1,674✔
599
            }
1,674✔
600
            return;
×
601
        }
×
602

34,848✔
603
        const char* sub_header = m_alloc.translate(ref_type(ref));
70,551✔
604
        const bool sub_isindex = get_context_flag_from_header(sub_header);
70,551✔
605

34,848✔
606
        // List of row indices with common prefix up to this point, in sorted order.
34,848✔
607
        if (!sub_isindex) {
70,551✔
608
            const IntegerColumn sub(m_alloc, ref_type(ref));
42,213✔
609
            return from_list_all(value, result, sub, column);
42,213✔
610
        }
42,213✔
611

14,187✔
612
        // Recurse into sub-index;
14,187✔
613
        header = sub_header;
28,338✔
614
        data = get_data_from_header(header);
28,338✔
615
        width = get_width_from_header(header);
28,338✔
616
        is_inner_node = get_is_inner_bptree_node_from_header(header);
28,338✔
617

14,187✔
618
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
14,187✔
619
        stringoffset += 4;
28,338✔
620

14,187✔
621
        // Update 4 byte index key
14,187✔
622
        key = StringIndex::create_key(index_data, stringoffset);
28,338✔
623
    }
28,338✔
624
}
50,007✔
625

626
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
627
{
72✔
628
    const char* sub_header = alloc.translate(ref_type(ref));
72✔
629
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
630

36✔
631
    if (sub_isindex) {
72✔
632
        Array tree(alloc);
36✔
633
        tree.init_from_ref(ref);
36✔
634
        auto sz = tree.size();
36✔
635
        for (size_t n = 1; n < sz; n++) {
90✔
636
            auto rot = tree.get_as_ref_or_tagged(n);
54✔
637
            // Literal row index (tagged)
27✔
638
            if (rot.is_tagged()) {
54✔
639
                result.insert(rot.get_as_int());
30✔
640
            }
30✔
641
            else {
24✔
642
                get_all_keys_below(result, rot.get_as_ref(), alloc);
24✔
643
            }
24✔
644
        }
54✔
645
    }
36✔
646
    else {
36✔
647
        IntegerColumn tree(alloc, ref);
36✔
648
        tree.for_all([&result](int64_t i) {
84✔
649
            result.insert(i);
84✔
650
        });
84✔
651
    }
36✔
652
}
72✔
653

654
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
655
{
66✔
656
    size_t stringoffset = 0;
66✔
657

33✔
658
    for (;;) {
90✔
659
        const char* data = NodeHeader::get_data_from_header(header);
90✔
660
        uint_least8_t width = get_width_from_header(header);
90✔
661

45✔
662
        // Create 4 byte lower and upper key
45✔
663
        key_type lower = 0;
90✔
664
        size_t i = 0;
90✔
665
        size_t n = str.size() - stringoffset;
90✔
666
        bool is_at_string_end = (n <= 4);
90✔
667
        if (!is_at_string_end) {
90✔
668
            n = 4;
30✔
669
        }
30✔
670
        while (i < n) {
360✔
671
            lower <<= 8;
270✔
672
            lower += str[stringoffset + i++];
270✔
673
        }
270✔
674
        size_t shift = (4 - i) * 8;
90✔
675
        key_type upper = lower + 1;
90✔
676
        lower <<= shift;
90✔
677
        upper <<= shift;
90✔
678
        upper--;
90✔
679

45✔
680
        // Get index array
45✔
681
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
90✔
682

45✔
683
        // Find the position matching the key
45✔
684
        const char* offsets_header = m_alloc.translate(offsets_ref);
90✔
685
        const char* offsets_data = get_data_from_header(offsets_header);
90✔
686
        size_t offsets_size = get_size_from_header(offsets_header);
90✔
687
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
90✔
688
        // If key is outside range, we know there can be no match
45✔
689
        if (pos == offsets_size)
90✔
690
            return;
×
691

45✔
692
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
90✔
693

45✔
694
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
90✔
695
            bool done = false;
×
696
            while (!done) {
×
697
                // Recursively call with child node
698
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
699
                _index_string_find_all_prefix(result, str, header);
×
700

701
                // Check if current node is past end of key range or last node
702
                auto key = get_direct<32>(offsets_data, pos++);
×
703
                done = key > upper || pos == offsets_size;
×
704
            }
×
705
            return;
×
706
        }
×
707

45✔
708
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
90✔
709

45✔
710
        if (pos == end) {
90✔
711
            // No match
9✔
712
            return;
18✔
713
        }
18✔
714

36✔
715
        if (is_at_string_end) {
72✔
716
            // now get all entries from start to end
24✔
717
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
108✔
718
                uint64_t ref = get_direct(data, width, ndx);
60✔
719
                // Literal row index (tagged)
30✔
720
                if (ref & 1) {
60✔
721
                    result.emplace(int64_t(ref >> 1));
12✔
722
                }
12✔
723
                else {
48✔
724
                    get_all_keys_below(result, to_ref(ref), m_alloc);
48✔
725
                }
48✔
726
            }
60✔
727
            return;
48✔
728
        }
48✔
729

12✔
730
        // When we are not at end of string then we are comparing against the whole key
12✔
731
        // and we can have at most one match
12✔
732
        REALM_ASSERT(end == pos + 1);
24✔
733
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
24✔
734
        stringoffset += 4;
24✔
735
    }
24✔
736
}
66✔
737

738
ObjKey IndexArray::index_string_find_first(Mixed value, const ClusterColumn& column) const
739
{
1,229,658✔
740
    InternalFindResult unused;
1,229,658✔
741
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,229,658✔
742
}
1,229,658✔
743

744

745
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, Mixed value, const ClusterColumn& column,
746
                                       bool case_insensitive) const
747
{
56,583✔
748
    if (case_insensitive && value.is_type(type_String)) {
56,583✔
749
        index_string_all_ins(value.get_string(), result, column);
6,576✔
750
    }
6,576✔
751
    else {
50,007✔
752
        index_string_all(value, result, column);
50,007✔
753
    }
50,007✔
754
}
56,583✔
755

756
FindRes IndexArray::index_string_find_all_no_copy(Mixed value, const ClusterColumn& column,
757
                                                  InternalFindResult& result) const
758
{
657,099✔
759
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
657,099✔
760
}
657,099✔
761

762
size_t IndexArray::index_string_count(Mixed value, const ClusterColumn& column) const
763
{
12,864✔
764
    InternalFindResult unused;
12,864✔
765
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
766
}
12,864✔
767

768
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
769
{
141,798✔
770
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
141,762✔
771
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
141,798✔
772
    top->create(type);                                      // Throws
141,798✔
773

68,955✔
774
    // Mark that this is part of index
68,955✔
775
    // (as opposed to columns under leaves)
68,955✔
776
    top->set_context_flag(true);
141,798✔
777

68,955✔
778
    // Add subcolumns for leaves
68,955✔
779
    Array values(alloc);
141,798✔
780
    values.create(Array::type_Normal);       // Throws
141,798✔
781
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
141,798✔
782
    top->add(values.get_ref());              // first entry in refs points to offsets
141,798✔
783

68,955✔
784
    return top;
141,798✔
785
}
141,798✔
786

787
void StringIndex::set_target(const ClusterColumn& target_column) noexcept
788
{
×
789
    m_target_column = target_column;
×
790
}
×
791

792

793
StringIndex::key_type StringIndex::get_last_key() const
794
{
59,727✔
795
    Array offsets(m_array->get_alloc());
59,727✔
796
    get_child(*m_array, 0, offsets);
59,727✔
797
    return key_type(offsets.back());
59,727✔
798
}
59,727✔
799

800

801
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
802
{
9,115,992✔
803
    // Create 4 byte index key
4,535,310✔
804
    key_type key = create_key(index_data, offset);
9,115,992✔
805
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
9,115,992✔
806
}
9,115,992✔
807

808
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
809
                                                   const IntegerColumnIterator& lower)
810
{
2,068,479✔
811
    SortedListComparator slc(m_target_column);
2,068,479✔
812
    // At this point there exists duplicates of this value, we need to
1,035,927✔
813
    // insert value beside it's duplicates so that rows are also sorted
1,035,927✔
814
    // in ascending order.
1,035,927✔
815
    IntegerColumn::const_iterator upper = std::upper_bound(lower, list.cend(), value, slc);
2,068,479✔
816
    // find insert position (the list has to be kept in sorted order)
1,035,927✔
817
    // In most cases the refs will be added to the end. So we test for that
1,035,927✔
818
    // first to see if we can avoid the binary search for insert position
1,035,927✔
819
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,068,479✔
820
    int64_t last_key_value = *last;
2,068,479✔
821
    if (key.value >= last_key_value) {
2,068,479✔
822
        list.insert(upper.get_position(), key.value);
2,032,458✔
823
    }
2,032,458✔
824
    else {
36,021✔
825
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
36,021✔
826
        list.insert(inner_lower.get_position(), key.value);
36,021✔
827
    }
36,021✔
828
}
2,068,479✔
829

830
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
831
{
1,941✔
832
    SortedListComparator slc(m_target_column);
1,941✔
833
    IntegerColumn::const_iterator it_end = list.cend();
1,941✔
834
    IntegerColumn::const_iterator lower = std::lower_bound(list.cbegin(), it_end, value, slc);
1,941✔
835

1,023✔
836
    if (lower == it_end) {
1,941✔
837
        // Not found and everything is less, just append it to the end.
744✔
838
        list.add(key.value);
1,461✔
839
    }
1,461✔
840
    else {
480✔
841
        ObjKey lower_key = ObjKey(*lower);
480✔
842
        Mixed lower_value = get(lower_key);
480✔
843

279✔
844
        if (lower_value != value) {
480✔
845
            list.insert(lower.get_position(), key.value);
480✔
846
        }
480✔
847
        else {
×
848
            // At this point there exists duplicates of this value, we need to
849
            // insert value beside it's duplicates so that rows are also sorted
850
            // in ascending order.
851
            insert_to_existing_list_at_lower(key, value, list, lower);
×
852
        }
×
853
    }
480✔
854
}
1,941✔
855

856

857
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
858
{
2,271✔
859
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
2,271✔
860

1,344✔
861
    // Create 4 byte index key
1,344✔
862
    key_type key = create_key(index_data, offset);
2,271✔
863

1,344✔
864
    // Get subnode table
1,344✔
865
    Allocator& alloc = m_array->get_alloc();
2,271✔
866
    Array values(alloc);
2,271✔
867
    get_child(*m_array, 0, values);
2,271✔
868
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,271✔
869

1,344✔
870
    size_t ins_pos = values.lower_bound_int(key);
2,271✔
871
    if (ins_pos == values.size()) {
2,271✔
872
        // When key is outside current range, we can just add it
1,344✔
873
        values.add(key);
2,271✔
874
        m_array->add(ref);
2,271✔
875
        return;
2,271✔
876
    }
2,271✔
877

878
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
879
    // Since we only use this for moving existing values to new
880
    // subindexes, there should never be an existing match.
881
    key_type k = key_type(values.get(ins_pos));
×
882
    REALM_ASSERT(k != key);
×
883
#endif // LCOV_EXCL_STOP ignore debug code
×
884

885
    // If key is not present we add it at the correct location
886
    values.insert(ins_pos, key);
×
887
    m_array->insert(ins_pos + 1, ref);
×
888
}
×
889

890

891
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
892
{
9,121,572✔
893
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
9,121,572✔
894
    switch (nc.type) {
9,121,572✔
895
        case NodeChange::change_None:
9,127,821✔
896
            return;
9,127,821✔
897
        case NodeChange::change_InsertBefore: {
✔
898
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
×
899
            new_node.node_add_key(nc.ref1);
×
900
            new_node.node_add_key(get_ref());
×
901
            m_array->init_from_ref(new_node.get_ref());
×
902
            m_array->update_parent();
×
903
            return;
×
904
        }
×
905
        case NodeChange::change_InsertAfter: {
6✔
906
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
6✔
907
            new_node.node_add_key(get_ref());
6✔
908
            new_node.node_add_key(nc.ref1);
6✔
909
            m_array->init_from_ref(new_node.get_ref());
6✔
910
            m_array->update_parent();
6✔
911
            return;
6✔
912
        }
×
913
        case NodeChange::change_Split: {
63✔
914
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
63✔
915
            new_node.node_add_key(nc.ref1);
63✔
916
            new_node.node_add_key(nc.ref2);
63✔
917
            m_array->init_from_ref(new_node.get_ref());
63✔
918
            m_array->update_parent();
63✔
919
            return;
63✔
920
        }
×
921
    }
×
922
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
923
}
×
924

925

926
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
927
                                               const Mixed& value)
928
{
9,181,581✔
929
    Allocator& alloc = m_array->get_alloc();
9,181,581✔
930
    if (m_array->is_inner_bptree_node()) {
9,181,581✔
931
        // Get subnode table
23,046✔
932
        Array keys(alloc);
54,774✔
933
        get_child(*m_array, 0, keys);
54,774✔
934
        REALM_ASSERT(m_array->size() == keys.size() + 1);
54,774✔
935

23,046✔
936
        // Find the subnode containing the item
23,046✔
937
        size_t node_ndx = keys.lower_bound_int(key);
54,774✔
938
        if (node_ndx == keys.size()) {
54,774✔
939
            // node can never be empty, so try to fit in last item
9,303✔
940
            node_ndx = keys.size() - 1;
18,645✔
941
        }
18,645✔
942

23,046✔
943
        // Get sublist
23,046✔
944
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
54,774✔
945
        ref_type ref = m_array->get_as_ref(refs_ndx);
54,774✔
946
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
54,774✔
947

23,046✔
948
        // Insert item
23,046✔
949
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
54,774✔
950
        if (nc.type == NodeChange::change_None) {
54,774✔
951
            // update keys
23,028✔
952
            key_type last_key = target.get_last_key();
54,732✔
953
            keys.set(node_ndx, last_key);
54,732✔
954
            return NodeChange::change_None; // no new nodes
54,732✔
955
        }
54,732✔
956

18✔
957
        if (nc.type == NodeChange::change_InsertAfter) {
42✔
958
            ++node_ndx;
18✔
959
            ++refs_ndx;
18✔
960
        }
18✔
961

18✔
962
        // If there is room, just update node directly
18✔
963
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
42✔
964
            if (nc.type == NodeChange::change_Split) {
42✔
965
                node_insert_split(node_ndx, nc.ref2);
24✔
966
            }
24✔
967
            else {
18✔
968
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
18✔
969
            }
18✔
970
            return NodeChange::change_None;
42✔
971
        }
42✔
972

973
        // Else create new node
974
        StringIndex new_node(inner_node_tag(), alloc);
×
975
        if (nc.type == NodeChange::change_Split) {
×
976
            // update offset for left node
977
            key_type last_key = target.get_last_key();
×
978
            keys.set(node_ndx, last_key);
×
979

980
            new_node.node_add_key(nc.ref2);
×
981
            ++node_ndx;
×
982
            ++refs_ndx;
×
983
        }
×
984
        else {
×
985
            new_node.node_add_key(nc.ref1);
×
986
        }
×
987

988
        switch (node_ndx) {
×
989
            case 0: // insert before
×
990
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
991
            case REALM_MAX_BPNODE_SIZE: // insert after
×
992
                if (nc.type == NodeChange::change_Split)
×
993
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
994
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
995
            default: // split
×
996
                // Move items after split to new node
997
                size_t len = m_array->size();
×
998
                for (size_t i = refs_ndx; i < len; ++i) {
×
999
                    ref_type ref_i = m_array->get_as_ref(i);
×
1000
                    new_node.node_add_key(ref_i);
×
1001
                }
×
1002
                keys.truncate(node_ndx);
×
1003
                m_array->truncate(refs_ndx);
×
1004
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
1005
        }
9,126,807✔
1006
    }
9,126,807✔
1007
    else {
9,126,807✔
1008
        // Is there room in the list?
4,545,012✔
1009
        Array old_keys(alloc);
9,126,807✔
1010
        get_child(*m_array, 0, old_keys);
9,126,807✔
1011
        const size_t old_offsets_size = old_keys.size();
9,126,807✔
1012
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
9,126,807✔
1013

4,545,012✔
1014
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
9,126,807✔
1015

4,545,012✔
1016
        // See if we can fit entry into current leaf
4,545,012✔
1017
        // Works if there is room or it can join existing entries
4,545,012✔
1018
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
9,126,807✔
1019
            return NodeChange::change_None;
9,129,519✔
1020

2,147,483,647✔
1021
        // Create new list for item (a leaf)
2,147,483,647✔
1022
        StringIndex new_list(m_target_column, alloc);
2,147,483,866✔
1023

2,147,483,647✔
1024
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
2,147,483,866✔
1025

2,147,483,647✔
1026
        size_t ndx = old_keys.lower_bound_int(key);
2,147,483,866✔
1027

2,147,483,647✔
1028
        // insert before
2,147,483,647✔
1029
        if (ndx == 0)
2,147,483,866✔
1030
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
×
1031

2,147,483,647✔
1032
        // insert after
2,147,483,647✔
1033
        if (ndx == old_offsets_size)
2,147,483,866✔
1034
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
24✔
1035

2,147,483,647✔
1036
        // split
2,147,483,647✔
1037
        Array new_keys(alloc);
2,147,483,854✔
1038
        get_child(*new_list.m_array, 0, new_keys);
2,147,483,854✔
1039
        // Move items after split to new list
2,147,483,647✔
1040
        for (size_t i = ndx; i < old_offsets_size; ++i) {
2,147,507,617✔
1041
            int64_t v2 = old_keys.get(i);
42,033✔
1042
            int64_t v3 = m_array->get(i + 1);
42,033✔
1043

18,270✔
1044
            new_keys.add(v2);
42,033✔
1045
            new_list.m_array->add(v3);
42,033✔
1046
        }
42,033✔
1047
        old_keys.truncate(ndx);
2,147,483,854✔
1048
        m_array->truncate(ndx + 1);
2,147,483,854✔
1049

2,147,483,647✔
1050
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
2,147,483,854✔
1051
    }
2,147,483,854✔
1052
}
9,181,581✔
1053

1054

1055
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1056
{
24✔
1057
    REALM_ASSERT(m_array->is_inner_bptree_node());
24✔
1058
    REALM_ASSERT(new_ref);
24✔
1059

9✔
1060
    Allocator& alloc = m_array->get_alloc();
24✔
1061
    Array offsets(alloc);
24✔
1062
    get_child(*m_array, 0, offsets);
24✔
1063

9✔
1064
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
24✔
1065
    REALM_ASSERT(ndx < offsets.size());
24✔
1066
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
24✔
1067

9✔
1068
    // Get sublists
9✔
1069
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
24✔
1070
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
24✔
1071
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
24✔
1072
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
24✔
1073

9✔
1074
    // Update original key
9✔
1075
    key_type last_key = orig_col.get_last_key();
24✔
1076
    offsets.set(ndx, last_key);
24✔
1077

9✔
1078
    // Insert new ref
9✔
1079
    key_type new_key = new_col.get_last_key();
24✔
1080
    offsets.insert(ndx + 1, new_key);
24✔
1081
    m_array->insert(ndx + 2, new_ref);
24✔
1082
}
24✔
1083

1084

1085
void StringIndex::node_insert(size_t ndx, size_t ref)
1086
{
18✔
1087
    REALM_ASSERT(ref);
18✔
1088
    REALM_ASSERT(m_array->is_inner_bptree_node());
18✔
1089

9✔
1090
    Allocator& alloc = m_array->get_alloc();
18✔
1091
    Array offsets(alloc);
18✔
1092
    get_child(*m_array, 0, offsets);
18✔
1093
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
18✔
1094

9✔
1095
    REALM_ASSERT(ndx <= offsets.size());
18✔
1096
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
18✔
1097

9✔
1098
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
18✔
1099
    key_type last_key = col.get_last_key();
18✔
1100

9✔
1101
    offsets.insert(ndx, last_key);
18✔
1102
    m_array->insert(ndx + 1, ref);
18✔
1103
}
18✔
1104

1105
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1106
                              bool noextend)
1107
{
9,105,141✔
1108
    REALM_ASSERT(!m_array->is_inner_bptree_node());
9,105,141✔
1109

4,527,522✔
1110
    // Get subnode table
4,527,522✔
1111
    Allocator& alloc = m_array->get_alloc();
9,105,141✔
1112
    Array keys(alloc);
9,105,141✔
1113
    get_child(*m_array, 0, keys);
9,105,141✔
1114
    REALM_ASSERT(m_array->size() == keys.size() + 1);
9,105,141✔
1115

4,527,522✔
1116
    // If we are keeping the complete string in the index
4,527,522✔
1117
    // we want to know if this is the last part
4,527,522✔
1118
    bool is_at_string_end = offset + 4 >= index_data.size();
9,105,141✔
1119

4,527,522✔
1120
    size_t ins_pos = keys.lower_bound_int(key);
9,105,141✔
1121
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
9,105,141✔
1122

4,527,522✔
1123
    if (ins_pos == keys.size()) {
9,105,141✔
1124
        if (noextend)
274,767✔
1125
            return false;
24✔
1126

132,612✔
1127
        // When key is outside current range, we can just add it
132,612✔
1128
        keys.add(key);
274,743✔
1129
        if (!m_target_column.is_fulltext() || is_at_string_end) {
274,743✔
1130
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
273,933✔
1131
            m_array->add(shifted);
273,933✔
1132
        }
273,933✔
1133
        else {
810✔
1134
            // create subindex for rest of string
411✔
1135
            StringIndex subindex(m_target_column, m_array->get_alloc());
810✔
1136
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
810✔
1137
            m_array->add(subindex.get_ref());
810✔
1138
        }
810✔
1139
        return true;
274,743✔
1140
    }
274,743✔
1141

4,394,898✔
1142
    key_type k = key_type(keys.get(ins_pos));
8,830,374✔
1143

4,394,898✔
1144
    // If key is not present we add it at the correct location
4,394,898✔
1145
    if (k != key) {
8,830,374✔
1146
        if (noextend)
636,636✔
1147
            return false;
87✔
1148

308,622✔
1149
        keys.insert(ins_pos, key);
636,549✔
1150
        if (!m_target_column.is_fulltext() || is_at_string_end) {
636,549✔
1151
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
634,947✔
1152
            m_array->insert(ins_pos_refs, shifted);
634,947✔
1153
        }
634,947✔
1154
        else {
1,602✔
1155
            // create subindex for rest of string
765✔
1156
            StringIndex subindex(m_target_column, m_array->get_alloc());
1,602✔
1157
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
1,602✔
1158
            m_array->insert(ins_pos_refs, subindex.get_ref());
1,602✔
1159
        }
1,602✔
1160
        return true;
636,549✔
1161
    }
636,549✔
1162

4,086,240✔
1163
    // This leaf already has a slot for for the key
4,086,240✔
1164

4,086,240✔
1165
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,193,738✔
1166
    size_t suboffset = offset + s_index_key_length;
8,193,738✔
1167

4,086,240✔
1168
    // Single match (lowest bit set indicates literal row_ndx)
4,086,240✔
1169
    if ((slot_value & 1) != 0) {
8,193,738✔
1170
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
50,025✔
1171
        Mixed v2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
49,743✔
1172
        if (v2 == value) {
50,025✔
1173
            // Strings are equal but this is not a list.
4,671✔
1174
            // Create a list and add both rows.
4,671✔
1175

4,671✔
1176
            // convert to list (in sorted order)
4,671✔
1177
            Array row_list(alloc);
9,432✔
1178
            row_list.create(Array::type_Normal); // Throws
9,432✔
1179
            row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
8,757✔
1180
            row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
8,757✔
1181
            m_array->set(ins_pos_refs, row_list.get_ref());
9,432✔
1182
        }
9,432✔
1183
        else {
40,593✔
1184
            StringConversionBuffer buffer;
40,593✔
1185
            auto index_data_2 = v2.get_index_data(buffer);
40,593✔
1186
            if (index_data == index_data_2 || suboffset > s_max_offset) {
40,593✔
1187
                // These strings have the same prefix up to this point but we
249✔
1188
                // don't want to recurse further, create a list in sorted order.
249✔
1189
                bool row_ndx_first = value.compare_signed(v2) < 0;
522✔
1190
                Array row_list(alloc);
522✔
1191
                row_list.create(Array::type_Normal); // Throws
522✔
1192
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
393✔
1193
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
393✔
1194
                m_array->set(ins_pos_refs, row_list.get_ref());
522✔
1195
            }
522✔
1196
            else {
40,071✔
1197
                // These strings have the same prefix up to this point but they
18,354✔
1198
                // are actually not equal. Extend the tree recursivly until the
18,354✔
1199
                // prefix of these strings is different.
18,354✔
1200
                StringIndex subindex(m_target_column, m_array->get_alloc());
40,071✔
1201
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
40,071✔
1202
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
40,071✔
1203
                // Join the string of SubIndices to the current position of m_array
18,354✔
1204
                m_array->set(ins_pos_refs, subindex.get_ref());
40,071✔
1205
            }
40,071✔
1206
        }
40,593✔
1207
        return true;
50,025✔
1208
    }
50,025✔
1209

4,062,966✔
1210
    // If there already is a list of matches, we see if we fit there
4,062,966✔
1211
    // or it has to be split into a subindex
4,062,966✔
1212
    ref_type ref = ref_type(slot_value);
8,143,713✔
1213
    char* header = alloc.translate(ref);
8,143,713✔
1214
    if (!Array::get_context_flag_from_header(header)) {
8,143,713✔
1215
        IntegerColumn sub(alloc, ref); // Throws
2,071,833✔
1216
        sub.set_parent(m_array.get(), ins_pos_refs);
2,071,833✔
1217

1,038,099✔
1218
        IntegerColumn::const_iterator it_end = sub.cend();
2,071,833✔
1219
        IntegerColumn::const_iterator lower = it_end;
2,071,833✔
1220

1,038,099✔
1221
        auto value_exists_in_list = [&]() {
2,072,775✔
1222
            if (m_target_column.is_fulltext()) {
2,072,775✔
1223
                return reconstruct_string(offset, key, index_data) == value.get_string();
348✔
1224
            }
348✔
1225
            SortedListComparator slc(m_target_column);
2,072,427✔
1226
            lower = std::lower_bound(sub.cbegin(), it_end, value, slc);
2,072,427✔
1227

1,038,336✔
1228
            if (lower != it_end) {
2,072,427✔
1229
                Mixed lower_value = get(ObjKey(*lower));
2,069,463✔
1230
                if (lower_value == value) {
2,069,463✔
1231
                    return true;
2,068,143✔
1232
                }
2,068,143✔
1233
            }
4,284✔
1234
            return false;
4,284✔
1235
        };
4,284✔
1236

1,038,099✔
1237
        // If we found the value in this list, add the duplicate to the list.
1,038,099✔
1238
        if (value_exists_in_list()) {
2,071,833✔
1239
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
2,068,449✔
1240
        }
2,068,449✔
1241
        else {
3,384✔
1242
            // If the list only stores duplicates we are free to branch and
2,184✔
1243
            // and create a sub index with this existing list as one of the
2,184✔
1244
            // leafs, but if the list doesn't only contain duplicates we
2,184✔
1245
            // must respect that we store a common key prefix up to this
2,184✔
1246
            // point and insert into the existing list.
2,184✔
1247
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
3,384✔
1248
            StringConversionBuffer buffer;
3,384✔
1249
            StringData index_data_2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data)
2,187✔
1250
                                                                    : get(key_of_any_dup).get_index_data(buffer);
3,381✔
1251
            if (index_data == index_data_2 || suboffset > s_max_offset) {
4,206✔
1252
                insert_to_existing_list(obj_key, value, sub);
1,941✔
1253
            }
1,941✔
1254
            else {
1,443✔
1255
#ifdef REALM_DEBUG
1,443✔
1256
                bool contains_only_duplicates = true;
1,443✔
1257
                if (!m_target_column.is_fulltext() && sub.size() > 1) {
2,265✔
1258
                    ObjKey first_key = ObjKey(sub.get(0));
1,764✔
1259
                    ObjKey last_key = ObjKey(sub.back());
1,764✔
1260
                    auto first = get(first_key);
1,764✔
1261
                    auto last = get(last_key);
1,764✔
1262
                    // Since the list is kept in sorted order, the first and
996✔
1263
                    // last values will be the same only if the whole list is
996✔
1264
                    // storing duplicate values.
996✔
1265
                    if (first != last) {
1,764✔
1266
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
1267
                    }
×
1268
                }
1,764✔
1269
                REALM_ASSERT_DEBUG(contains_only_duplicates);
1,443✔
1270
#endif
1,443✔
1271
                // The buffer is needed for when this is an integer index.
1,161✔
1272
                StringIndex subindex(m_target_column, m_array->get_alloc());
1,443✔
1273
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
1,443✔
1274
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
1,443✔
1275
                m_array->set(ins_pos_refs, subindex.get_ref());
1,443✔
1276
            }
1,443✔
1277
        }
3,384✔
1278
        return true;
2,071,833✔
1279
    }
2,071,833✔
1280

3,024,867✔
1281
    // The key matches, but there is a subindex here so go down a level in the tree.
3,024,867✔
1282
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,071,880✔
1283
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
6,071,880✔
1284

3,024,867✔
1285
    return true;
6,071,880✔
1286
}
6,071,880✔
1287

1288
Mixed StringIndex::get(ObjKey key) const
1289
{
3,612,405✔
1290
    return m_target_column.get_value(key);
3,612,405✔
1291
}
3,612,405✔
1292

1293
void StringIndex::erase(ObjKey key)
1294
{
827,706✔
1295
    StringConversionBuffer buffer;
827,706✔
1296
    std::string_view value{(get(key).get_index_data(buffer))};
827,706✔
1297
    if (m_target_column.is_fulltext()) {
827,706✔
1298
        auto words = Tokenizer::get_instance()->reset(value).get_all_tokens();
72✔
1299
        for (auto& w : words) {
2,460✔
1300
            erase_string(key, w);
2,460✔
1301
        }
2,460✔
1302
    }
72✔
1303
    else {
827,634✔
1304
        erase_string(key, value);
827,634✔
1305
    }
827,634✔
1306
}
827,706✔
1307

1308
namespace {
1309
template <typename T>
1310
void intersect(std::vector<ObjKey>& result, T& keys)
1311
{
294✔
1312
    if (result.empty()) {
294✔
1313
        result.reserve(keys.size());
198✔
1314
        for (auto k : keys) {
498✔
1315
            result.emplace_back(k);
498✔
1316
        }
498✔
1317
    }
198✔
1318
    else {
96✔
1319
        auto it = result.begin();
96✔
1320
        auto keep = it;
96✔
1321
        auto m = keys.begin();
96✔
1322

48✔
1323
        // only keep intersection
48✔
1324
        while (it != result.end() && m != keys.end()) {
354✔
1325
            int64_t int_val = *m;
258✔
1326
            if (it->value < int_val) {
258✔
1327
                it++; // don't keep if match is not in new set
18✔
1328
            }
18✔
1329
            else if (it->value > int_val) {
240✔
1330
                ++m; // ignore new matches
102✔
1331
            }
102✔
1332
            else {
138✔
1333
                // Found both places - make sure it is kept
69✔
1334
                if (keep < it)
138✔
1335
                    *keep = *it;
6✔
1336
                ++keep;
138✔
1337
                ++it;
138✔
1338
                ++m;
138✔
1339
            }
138✔
1340
        }
258✔
1341
        if (keep != result.end()) {
96✔
1342
            result.erase(keep, result.end());
24✔
1343
        }
24✔
1344
    }
96✔
1345
}
294✔
1346

1347
struct FindResWrapper {
1348
    InternalFindResult& res;
1349
    IntegerColumn& indexes;
1350
    size_t n = 0;
1351
    size_t size()
1352
    {
144✔
1353
        return res.end_ndx - res.start_ndx;
144✔
1354
    }
144✔
1355
    auto begin()
1356
    {
228✔
1357
        return indexes.cbegin();
228✔
1358
    }
228✔
1359
    auto end()
1360
    {
384✔
1361
        return indexes.cend();
384✔
1362
    }
384✔
1363
};
1364
} // namespace
1365

1366
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1367
{
378✔
1368
    InternalFindResult res;
378✔
1369
    REALM_ASSERT(result.empty());
378✔
1370

189✔
1371
    auto tokenizer = Tokenizer::get_instance();
378✔
1372
    tokenizer->reset({value.data(), value.size()});
378✔
1373
    auto [includes, excludes] = tokenizer->get_search_tokens();
378✔
1374
    if (includes.empty()) {
378✔
1375
        if (excludes.empty()) {
24✔
1376
            throw InvalidArgument("Missing search token");
6✔
1377
        }
6✔
1378
        result = m_target_column.get_all_keys();
18✔
1379
    }
18✔
1380
    else {
354✔
1381
        for (auto& token : includes) {
432✔
1382
            if (token.back() == '*') {
432✔
1383
                std::set<int64_t> keys;
66✔
1384
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
66✔
1385
                intersect(result, keys);
66✔
1386
            }
66✔
1387
            else {
366✔
1388
                switch (find_all_no_copy(StringData{token}, res)) {
366✔
1389
                    case FindRes_not_found:
12✔
1390
                        result.clear();
12✔
1391
                        break;
12✔
1392
                    case FindRes_column: {
228✔
1393
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
228✔
1394
                        FindResWrapper wrapper{res, indexes};
228✔
1395
                        intersect(result, wrapper);
228✔
1396
                        break;
228✔
1397
                    }
×
1398
                    case FindRes_single:
126✔
1399
                        // merge in single res
63✔
1400
                        if (result.empty()) {
126✔
1401
                            result.emplace_back(res.payload);
96✔
1402
                        }
96✔
1403
                        else {
30✔
1404
                            ObjKey key(res.payload);
30✔
1405
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1406
                            if (pos != result.end() && key == *pos) {
30✔
1407
                                result.clear();
30✔
1408
                                result.push_back(key);
30✔
1409
                            }
30✔
1410
                            else {
×
1411
                                result.clear();
×
1412
                            }
×
1413
                        }
30✔
1414
                        break;
126✔
1415
                }
432✔
1416
            }
432✔
1417
            if (result.empty())
432✔
1418
                return;
36✔
1419
        }
432✔
1420
    }
354✔
1421

189✔
1422
    for (auto& token : excludes) {
357✔
1423
        if (token.back() == '*') {
96✔
1424
            throw IllegalOperation("Exclude by prefix is not implemented");
×
1425
        }
×
1426
        if (result.empty())
96✔
1427
            return;
×
1428

48✔
1429
        switch (find_all_no_copy(StringData{token}, res)) {
96✔
1430
            case FindRes_not_found:
✔
1431
                // Nothing to exclude
1432
                break;
×
1433
            case FindRes_column: {
60✔
1434
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1435

30✔
1436
                auto it = result.begin();
60✔
1437
                auto keep = it;
60✔
1438
                size_t m = res.start_ndx;
60✔
1439
                auto idx_val = indexes.get(m);
60✔
1440

30✔
1441
                while (it != result.end()) {
390✔
1442
                    if (it->value < idx_val) {
330✔
1443
                        // Not found in excludes
78✔
1444
                        if (keep < it)
156✔
1445
                            *keep = *it;
156✔
1446
                        ++keep;
156✔
1447
                        ++it;
156✔
1448
                    }
156✔
1449
                    else {
174✔
1450
                        if (it->value == idx_val) {
174✔
1451
                            // found in excludes - don't keep
75✔
1452
                            ++it;
150✔
1453
                        }
150✔
1454
                        ++m;
174✔
1455
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
153✔
1456
                    }
174✔
1457
                }
330✔
1458
                if (keep != result.end()) {
60✔
1459
                    result.erase(keep, result.end());
60✔
1460
                }
60✔
1461
                break;
60✔
1462
            }
×
1463
            case FindRes_single: {
36✔
1464
                // exclude single res
18✔
1465
                ObjKey key(res.payload);
36✔
1466
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1467
                if (pos != result.end() && key == *pos) {
36✔
1468
                    result.erase(pos);
36✔
1469
                }
36✔
1470
                break;
36✔
1471
            }
×
1472
        }
96✔
1473
    }
96✔
1474
}
336✔
1475

1476

1477
void StringIndex::clear()
1478
{
3,096✔
1479
    Array values(m_array->get_alloc());
3,096✔
1480
    get_child(*m_array, 0, values);
3,096✔
1481
    REALM_ASSERT(m_array->size() == values.size() + 1);
3,096✔
1482

894✔
1483
    values.clear();
3,096✔
1484
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
3,096✔
1485

894✔
1486
    size_t size = 1;
3,096✔
1487
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
3,096✔
1488

894✔
1489
    m_array->set_type(Array::type_HasRefs);
3,096✔
1490
}
3,096✔
1491

1492

1493
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
1494
{
1,072,515✔
1495
    Allocator& alloc = m_array->get_alloc();
1,072,515✔
1496
    Array values(alloc);
1,072,515✔
1497
    get_child(*m_array, 0, values);
1,072,515✔
1498
    REALM_ASSERT(m_array->size() == values.size() + 1);
1,072,515✔
1499

537,699✔
1500
    // Create 4 byte index key
537,699✔
1501
    key_type key = create_key(index_data, offset);
1,072,515✔
1502

537,699✔
1503
    const size_t pos = values.lower_bound_int(key);
1,072,515✔
1504
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,072,515✔
1505
    REALM_ASSERT(pos != values.size());
1,072,515✔
1506

537,699✔
1507
    if (m_array->is_inner_bptree_node()) {
1,072,515✔
1508
        ref_type ref = m_array->get_as_ref(pos_refs);
4,929✔
1509
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
4,929✔
1510
        node.do_delete(obj_key, index_data, offset);
4,929✔
1511

2,541✔
1512
        // Update the ref
2,541✔
1513
        if (node.is_empty()) {
4,929✔
1514
            values.erase(pos);
×
1515
            m_array->erase(pos_refs);
×
1516
            node.destroy();
×
1517
        }
×
1518
        else {
4,929✔
1519
            key_type max_val = node.get_last_key();
4,929✔
1520
            if (max_val != key_type(values.get(pos)))
4,929✔
1521
                values.set(pos, max_val);
×
1522
        }
4,929✔
1523
    }
4,929✔
1524
    else {
1,067,586✔
1525
        uint64_t ref = m_array->get(pos_refs);
1,067,586✔
1526
        if (ref & 1) {
1,067,586✔
1527
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
481,488✔
1528
            values.erase(pos);
481,488✔
1529
            m_array->erase(pos_refs);
481,488✔
1530
        }
481,488✔
1531
        else {
586,098✔
1532
            // A real ref either points to a list or a subindex
296,262✔
1533
            char* header = alloc.translate(ref_type(ref));
586,098✔
1534
            if (Array::get_context_flag_from_header(header)) {
586,098✔
1535
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
237,384✔
1536
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
237,384✔
1537

120,180✔
1538
                if (subindex.is_empty()) {
237,384✔
1539
                    values.erase(pos);
11,514✔
1540
                    m_array->erase(pos_refs);
11,514✔
1541
                    subindex.destroy();
11,514✔
1542
                }
11,514✔
1543
            }
237,384✔
1544
            else {
348,714✔
1545
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
348,714✔
1546
                sub.set_parent(m_array.get(), pos_refs);
348,714✔
1547
                size_t r = sub.find_first(obj_key.value);
348,714✔
1548
                size_t sub_size = sub.size(); // Slow
348,714✔
1549
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
348,714✔
1550
                sub.erase(r);
348,714✔
1551

176,082✔
1552
                if (sub_size == 1) {
348,714✔
1553
                    values.erase(pos);
2,952✔
1554
                    m_array->erase(pos_refs);
2,952✔
1555
                    sub.destroy();
2,952✔
1556
                }
2,952✔
1557
            }
348,714✔
1558
        }
586,098✔
1559
    }
1,067,586✔
1560
}
1,072,515✔
1561

1562
void StringIndex::erase_string(ObjKey key, StringData value)
1563
{
830,247✔
1564
    do_delete(key, value, 0);
830,247✔
1565

415,011✔
1566
    // Collapse top nodes with single item
415,011✔
1567
    while (m_array->is_inner_bptree_node()) {
830,247✔
1568
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
4,929✔
1569
        if (m_array->size() > 2)
4,929✔
1570
            break;
4,929✔
1571

1572
        ref_type ref = m_array->get_as_ref(1);
×
1573
        m_array->set(1, 1); // avoid destruction of the extracted ref
×
1574
        m_array->destroy_deep();
×
1575
        m_array->init_from_ref(ref);
×
1576
        m_array->update_parent();
×
1577
    }
×
1578
}
830,247✔
1579

1580
namespace {
1581

1582
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
1583
{
6,348✔
1584
    Allocator& alloc = node.get_alloc();
6,348✔
1585
    Array child(alloc);
6,348✔
1586
    size_t n = node.size();
6,348✔
1587
    REALM_ASSERT(n >= 1);
6,348✔
1588
    if (node.is_inner_bptree_node()) {
6,348✔
1589
        // Inner node
1590
        for (size_t i = 1; i < n; ++i) {
×
1591
            ref_type ref = node.get_as_ref(i);
×
1592
            child.init_from_ref(ref);
×
1593
            if (has_duplicate_values(child, target_col))
×
1594
                return true;
×
1595
        }
×
1596
        return false;
×
1597
    }
6,348✔
1598

3,174✔
1599
    // Leaf node
3,174✔
1600
    for (size_t i = 1; i < n; ++i) {
32,832✔
1601
        int_fast64_t value = node.get(i);
28,740✔
1602
        bool is_single_row_index = (value & 1) != 0;
28,740✔
1603
        if (is_single_row_index)
28,740✔
1604
            continue;
23,280✔
1605

2,730✔
1606
        ref_type ref = to_ref(value);
5,460✔
1607
        child.init_from_ref(ref);
5,460✔
1608

2,730✔
1609
        bool is_subindex = child.get_context_flag();
5,460✔
1610
        if (is_subindex) {
5,460✔
1611
            if (has_duplicate_values(child, target_col))
4,872✔
1612
                return true;
1,800✔
1613
            continue;
3,072✔
1614
        }
3,072✔
1615

294✔
1616
        // Child is root of B+-tree of row indexes
294✔
1617
        IntegerColumn sub(alloc, ref);
588✔
1618
        if (sub.size() > 1) {
588✔
1619
            ObjKey first_key = ObjKey(sub.get(0));
480✔
1620
            ObjKey last_key = ObjKey(sub.back());
480✔
1621
            Mixed first = target_col.get_value(first_key);
480✔
1622
            Mixed last = target_col.get_value(last_key);
480✔
1623
            // Since the list is kept in sorted order, the first and
240✔
1624
            // last values will be the same only if the whole list is
240✔
1625
            // storing duplicate values.
240✔
1626
            if (first == last) {
480✔
1627
                return true;
432✔
1628
            }
432✔
1629
            // There may also be several short lists combined, so we need to
24✔
1630
            // check each of these individually for duplicates.
24✔
1631
            IntegerColumn::const_iterator it = sub.cbegin();
48✔
1632
            IntegerColumn::const_iterator it_end = sub.cend();
48✔
1633
            SortedListComparator slc(target_col);
48✔
1634
            while (it != it_end) {
144✔
1635
                Mixed it_data = target_col.get_value(ObjKey(*it));
120✔
1636
                IntegerColumn::const_iterator next = std::upper_bound(it, it_end, it_data, slc);
120✔
1637
                size_t count_of_value = next - it; // row index subtraction in `sub`
120✔
1638
                if (count_of_value > 1) {
120✔
1639
                    return true;
24✔
1640
                }
24✔
1641
                it = next;
96✔
1642
            }
96✔
1643
        }
48✔
1644
    }
588✔
1645

3,174✔
1646
    return false;
5,220✔
1647
}
6,348✔
1648

1649
} // anonymous namespace
1650

1651

1652
bool StringIndex::has_duplicate_values() const noexcept
1653
{
1,476✔
1654
    return ::has_duplicate_values(*m_array, m_target_column);
1,476✔
1655
}
1,476✔
1656

1657

1658
bool StringIndex::is_empty() const
1659
{
242,472✔
1660
    return m_array->size() == 1; // first entry in refs points to offsets
242,472✔
1661
}
242,472✔
1662

1663
template <>
1664
void StringIndex::insert<StringData>(ObjKey key, StringData value)
1665
{
560,697✔
1666
    StringConversionBuffer buffer;
560,697✔
1667

280,392✔
1668
    if (this->m_target_column.is_fulltext()) {
560,697✔
1669
        auto words = Tokenizer::get_instance()->reset(std::string_view(value)).get_all_tokens();
198✔
1670

99✔
1671
        for (auto& word : words) {
936✔
1672
            Mixed m(word);
936✔
1673
            insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
936✔
1674
        }
936✔
1675
    }
198✔
1676
    else {
560,499✔
1677
        Mixed m(value);
560,499✔
1678
        insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
560,499✔
1679
    }
560,499✔
1680
}
560,697✔
1681

1682
template <>
1683
void StringIndex::set<StringData>(ObjKey key, StringData new_value)
1684
{
439,116✔
1685
    StringConversionBuffer buffer;
439,116✔
1686
    Mixed old_value = get(key);
439,116✔
1687
    Mixed new_value2 = Mixed(new_value);
439,116✔
1688

219,423✔
1689
    if (this->m_target_column.is_fulltext()) {
439,116✔
1690
        auto tokenizer = Tokenizer::get_instance();
168✔
1691
        StringData old_string = old_value.get_index_data(buffer);
168✔
1692
        std::set<std::string> old_words;
168✔
1693

84✔
1694
        if (old_string.size() > 0) {
168✔
1695
            tokenizer->reset({old_string.data(), old_string.size()});
6✔
1696
            old_words = tokenizer->get_all_tokens();
6✔
1697
        }
6✔
1698

84✔
1699
        tokenizer->reset({new_value.data(), new_value.size()});
168✔
1700
        auto new_words = tokenizer->get_all_tokens();
168✔
1701

84✔
1702
        auto w1 = old_words.begin();
168✔
1703
        auto w2 = new_words.begin();
168✔
1704

84✔
1705
        // Do a diff, deleting words no longer present and
84✔
1706
        // inserting new words
84✔
1707
        while (w1 != old_words.end() && w2 != new_words.end()) {
540✔
1708
            if (*w1 < *w2) {
372✔
1709
                erase_string(key, *w1);
126✔
1710
                ++w1;
126✔
1711
            }
126✔
1712
            else if (*w2 < *w1) {
246✔
1713
                Mixed m(*w2);
198✔
1714
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
198✔
1715
                ++w2;
198✔
1716
            }
198✔
1717
            else {
48✔
1718
                ++w1;
48✔
1719
                ++w2;
48✔
1720
            }
48✔
1721
        }
372✔
1722
        while (w1 != old_words.end()) {
168✔
1723
            erase_string(key, *w1);
×
1724
            ++w1;
×
1725
        }
×
1726
        while (w2 != new_words.end()) {
3,354✔
1727
            Mixed m(*w2);
3,186✔
1728
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
3,186✔
1729

1,593✔
1730
            ++w2;
3,186✔
1731
        }
3,186✔
1732
    }
168✔
1733
    else {
438,948✔
1734
        if (REALM_LIKELY(new_value2 != old_value)) {
438,948✔
1735
            // We must erase this row first because erase uses find_first which
216,231✔
1736
            // might find the duplicate if we insert before erasing.
216,231✔
1737
            erase(key); // Throws
432,669✔
1738

216,231✔
1739
            auto index_data = new_value2.get_index_data(buffer);
432,669✔
1740
            insert_with_offset(key, index_data, new_value2, 0); // Throws
432,669✔
1741
        }
432,669✔
1742
    }
438,948✔
1743
}
439,116✔
1744

1745
void StringIndex::node_add_key(ref_type ref)
1746
{
138✔
1747
    REALM_ASSERT(ref);
138✔
1748
    REALM_ASSERT(m_array->is_inner_bptree_node());
138✔
1749

60✔
1750
    Allocator& alloc = m_array->get_alloc();
138✔
1751
    Array offsets(alloc);
138✔
1752
    get_child(*m_array, 0, offsets);
138✔
1753
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
138✔
1754
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
138✔
1755

60✔
1756
    Array new_top(alloc);
138✔
1757
    Array new_offsets(alloc);
138✔
1758
    new_top.init_from_ref(ref);
138✔
1759
    new_offsets.init_from_ref(new_top.get_as_ref(0));
138✔
1760
    REALM_ASSERT(!new_offsets.is_empty());
138✔
1761

60✔
1762
    int64_t key = new_offsets.back();
138✔
1763
    offsets.add(key);
138✔
1764
    m_array->add(ref);
138✔
1765
}
138✔
1766

1767
// Must return true if value of object(key) is less than needle.
1768
bool SortedListComparator::operator()(int64_t key_value, Mixed needle) // used in lower_bound
1769
{
20,360,118✔
1770
    Mixed a = m_column.get_value(ObjKey(key_value));
20,360,118✔
1771
    if (a.is_null() && !needle.is_null())
20,360,118✔
1772
        return true;
618✔
1773
    else if (needle.is_null() && !a.is_null())
20,359,500✔
UNCOV
1774
        return false;
×
1775
    else if (a.is_null() && needle.is_null())
20,359,500✔
1776
        return false;
357,138✔
1777

10,028,979✔
1778
    if (a == needle)
20,002,362✔
1779
        return false;
20,035,512✔
1780

10,794✔
1781
    // The Mixed::operator< uses a lexicograpical comparison, therefore we
10,794✔
1782
    // cannot use our utf8_compare here because we need to be consistent with
10,794✔
1783
    // using the same compare method as how these strings were they were put
10,794✔
1784
    // into this ordered column in the first place.
10,794✔
1785
    return a.compare_signed(needle) < 0;
2,147,494,441✔
1786
}
2,147,494,441✔
1787

1788

1789
// Must return true if value of needle is less than value of object(key).
1790
bool SortedListComparator::operator()(Mixed needle, int64_t key_value) // used in upper_bound
1791
{
18,259,932✔
1792
    Mixed a = m_column.get_value(ObjKey(key_value));
18,259,932✔
1793
    if (needle == a) {
18,305,922✔
1794
        return false;
18,305,922✔
1795
    }
18,305,922✔
1796
    return !(*this)(key_value, needle);
4,294,967,294✔
1797
}
4,294,967,294✔
1798

1799
// LCOV_EXCL_START ignore debug functions
1800

1801

1802
void StringIndex::verify() const
1803
{
1,320✔
1804
#ifdef REALM_DEBUG
1,320✔
1805
    m_array->verify();
1,320✔
1806

660✔
1807
    Allocator& alloc = m_array->get_alloc();
1,320✔
1808
    const size_t array_size = m_array->size();
1,320✔
1809

660✔
1810
    // Get first matching row for every key
660✔
1811
    if (m_array->is_inner_bptree_node()) {
1,320✔
1812
        for (size_t i = 1; i < array_size; ++i) {
×
1813
            size_t ref = m_array->get_as_ref(i);
×
1814
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1815
            ndx.verify();
×
1816
        }
×
1817
    }
×
1818
    else {
1,320✔
1819
        size_t column_size = m_target_column.size();
1,320✔
1820
        for (size_t i = 1; i < array_size; ++i) {
2,736✔
1821
            int64_t ref = m_array->get(i);
1,416✔
1822

708✔
1823
            // low bit set indicate literal ref (shifted)
708✔
1824
            if (ref & 1) {
1,416✔
1825
                size_t r = to_size_t((uint64_t(ref) >> 1));
96✔
1826
                REALM_ASSERT_EX(r < column_size, r, column_size);
96✔
1827
            }
96✔
1828
            else {
1,320✔
1829
                // A real ref either points to a list or a subindex
660✔
1830
                char* header = alloc.translate(to_ref(ref));
1,320✔
1831
                if (Array::get_context_flag_from_header(header)) {
1,320✔
1832
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
1,296✔
1833
                    ndx.verify();
1,296✔
1834
                }
1,296✔
1835
                else {
24✔
1836
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
24✔
1837
                    IntegerColumn::const_iterator it = sub.cbegin();
24✔
1838
                    IntegerColumn::const_iterator it_end = sub.cend();
24✔
1839
                    SortedListComparator slc(m_target_column);
24✔
1840
                    Mixed previous = get(ObjKey(*it));
24✔
1841
                    size_t last_row = to_size_t(*it);
24✔
1842

12✔
1843
                    // Check that strings listed in sub are in sorted order
12✔
1844
                    // and if there are duplicates, that the row numbers are
12✔
1845
                    // sorted in the group of duplicates.
12✔
1846
                    while (it != it_end) {
96✔
1847
                        Mixed it_data = get(ObjKey(*it));
72✔
1848
                        size_t it_row = to_size_t(*it);
72✔
1849
                        REALM_ASSERT(previous.compare_signed(it_data) <= 0);
72✔
1850
                        if (it != sub.cbegin() && previous == it_data) {
72✔
1851
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
×
1852
                        }
×
1853
                        last_row = it_row;
72✔
1854
                        previous = get(ObjKey(*it));
72✔
1855
                        ++it;
72✔
1856
                    }
72✔
1857
                }
24✔
1858
            }
1,320✔
1859
        }
1,416✔
1860
    }
1,320✔
1861
// FIXME: Extend verification along the lines of IntegerColumn::verify().
660✔
1862
#endif
1,320✔
1863
}
1,320✔
1864

1865
#ifdef REALM_DEBUG
1866

1867
template <class T>
1868
void StringIndex::verify_entries(const ClusterColumn& column) const
1869
{
1870
    std::vector<ObjKey> results;
1871

1872
    auto it = column.begin();
1873
    auto end = column.end();
1874
    auto col = column.get_column_key();
1875
    while (it != end) {
1876
        ObjKey key = it->get_key();
1877
        T value = it->get<T>(col);
1878

1879
        find_all(results, value);
1880

1881
        auto ndx = find(results.begin(), results.end(), key);
1882
        REALM_ASSERT(ndx != results.end());
1883
        size_t found = count(value);
1884
        REALM_ASSERT_EX(found >= 1, found);
1885
        results.clear();
1886
    }
1887
}
1888

1889
namespace {
1890

1891
void out_hex(std::ostream& out, uint64_t val)
1892
{
×
1893
    if (val) {
×
1894
        out_hex(out, val >> 8);
×
1895
        if (char c = val & 0xFF) {
×
1896
            out << c;
×
1897
        }
×
1898
    }
×
1899
}
×
1900

1901
} // namespace
1902

1903
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
1904
{
×
1905
    int indent = level * 2;
×
1906
    Allocator& alloc = node.get_alloc();
×
1907
    Array subnode(alloc);
×
1908

1909
    size_t node_size = node.size();
×
1910
    REALM_ASSERT(node_size >= 1);
×
1911

1912
    out << std::hex;
×
1913

1914
    bool node_is_leaf = !node.is_inner_bptree_node();
×
1915
    if (node_is_leaf) {
×
1916
        out << std::setw(indent) << ""
×
1917
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1918
    }
×
1919
    else {
×
1920
        out << std::setw(indent) << ""
×
1921
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1922
    }
×
1923

1924
    subnode.init_from_ref(to_ref(node.front()));
×
1925
    out << std::setw(indent) << ""
×
1926
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
1927
    if (subnode.is_empty()) {
×
1928
        out << "no keys";
×
1929
    }
×
1930
    else {
×
1931
        out << "keys: ";
×
1932
        for (size_t i = 0; i != subnode.size(); ++i) {
×
1933
            if (i != 0)
×
1934
                out << ", ";
×
1935
            out_hex(out, subnode.get(i));
×
1936
        }
×
1937
    }
×
1938
    out << ")\n";
×
1939

1940
    if (node_is_leaf) {
×
1941
        for (size_t i = 1; i != node_size; ++i) {
×
1942
            int_fast64_t value = node.get(i);
×
1943
            bool is_single_row_index = (value & 1) != 0;
×
1944
            if (is_single_row_index) {
×
1945
                out << std::setw(indent) << ""
×
1946
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
1947
                continue;
×
1948
            }
×
1949
            subnode.init_from_ref(to_ref(value));
×
1950
            bool is_subindex = subnode.get_context_flag();
×
1951
            if (is_subindex) {
×
1952
                out << std::setw(indent) << ""
×
1953
                    << "  Subindex\n";
×
1954
                dump_node_structure(subnode, out, level + 2);
×
1955
                continue;
×
1956
            }
×
1957
            IntegerColumn indexes(alloc, to_ref(value));
×
1958
            out << std::setw(indent) << ""
×
1959
                << "  List of row indexes\n";
×
1960
            indexes.dump_values(out, level + 2);
×
1961
        }
×
1962
        return;
×
1963
    }
×
1964

1965

1966
    size_t num_children = node_size - 1;
×
1967
    size_t child_ref_begin = 1;
×
1968
    size_t child_ref_end = 1 + num_children;
×
1969
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
1970
        subnode.init_from_ref(node.get_as_ref(i));
×
1971
        dump_node_structure(subnode, out, level + 1);
×
1972
    }
×
1973
}
×
1974

1975

1976
void StringIndex::do_dump_node_structure(std::ostream& out, int level) const
1977
{
×
1978
    dump_node_structure(*m_array, out, level);
×
1979
}
×
1980

1981
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc