• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 1817

04 Nov 2023 12:29AM UTC coverage: 91.695% (+0.04%) from 91.66%
1817

push

Evergreen

web-flow
Use a single write transaction for DiscardLocal client resets on FLX realms (#7110)

Updating the subscription store in a separate write transaction from the
recovery means that we temporarily commit an invalid state. If the application
crashes between committing the client reset diff and updating the subscription
store, the next launch of the application would try to use the now-invalid
pending subscriptions that should have been discarded.

92122 of 168844 branches covered (0.0%)

141 of 146 new or added lines in 7 files covered. (96.58%)

59 existing lines in 12 files now uncovered.

230819 of 251726 relevant lines covered (91.69%)

6481779.32 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.87
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/timestamp.hpp>
31
#include <realm/column_integer.hpp>
32
#include <realm/unicode.hpp>
33
#include <realm/tokenizer.hpp>
34

35
using namespace realm;
36
using namespace realm::util;
37

38
namespace {
39

40
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
41
{
19,779,123✔
42
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
19,779,123✔
43
    child.init_from_ref(child_ref);
19,779,123✔
44
    child.set_parent(&parent, child_ref_ndx);
19,779,123✔
45
}
19,779,123✔
46

47
// This method reconstructs the string inserted in the search index based on a string
48
// that matches so far and the last key (only works if complete strings are stored in the index)
49
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
50
{
1,086✔
51
    if (key == 0)
1,086✔
52
        return StringData();
×
53

543✔
54
    size_t rest_len = 4;
1,086✔
55
    char* k = reinterpret_cast<char*>(&key);
1,086✔
56
    if (k[0] == 'X')
1,086✔
57
        rest_len = 3;
234✔
58
    else if (k[1] == 'X')
852✔
59
        rest_len = 2;
360✔
60
    else if (k[2] == 'X')
492✔
61
        rest_len = 1;
228✔
62
    else if (k[3] == 'X')
264✔
63
        rest_len = 0;
36✔
64

543✔
65
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,086✔
66

543✔
67
    return StringData(new_string.data(), offset + rest_len);
1,086✔
68
}
1,086✔
69

70

71
} // anonymous namespace
72

73
DataType ClusterColumn::get_data_type() const
74
{
×
75
    const Table* table = m_cluster_tree->get_owning_table();
×
76
    return table->get_column_type(m_column_key);
×
77
}
×
78

79
Mixed ClusterColumn::get_value(ObjKey key) const
80
{
43,327,266✔
81
    const Obj obj{m_cluster_tree->get(key)};
43,327,266✔
82
    return obj.get_any(m_column_key);
43,327,266✔
83
}
43,327,266✔
84

85
std::vector<ObjKey> ClusterColumn::get_all_keys() const
86
{
18✔
87
    std::vector<ObjKey> ret;
18✔
88
    ret.reserve(m_cluster_tree->size());
18✔
89
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
90
        auto sz = cluster->node_size();
18✔
91
        for (size_t i = 0; i < sz; i++) {
198✔
92
            ret.push_back(cluster->get_real_key(i));
180✔
93
        }
180✔
94

9✔
95
        return IteratorControl::AdvanceToNext;
18✔
96
    });
18✔
97
    return ret;
18✔
98
}
18✔
99

100
template <>
101
int64_t IndexArray::from_list<index_FindFirst>(Mixed value, InternalFindResult& /* result_ref */,
102
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
103
{
5,244✔
104
    SortedListComparator slc(column);
5,244✔
105

2,598✔
106
    IntegerColumn::const_iterator it_end = key_values.cend();
5,244✔
107
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
5,244✔
108
    if (lower == it_end)
5,244✔
109
        return null_key.value;
×
110

2,598✔
111
    int64_t first_key_value = *lower;
5,244✔
112

2,598✔
113
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,244✔
114
    if (actual != value)
5,244✔
115
        return null_key.value;
×
116

2,598✔
117
    return first_key_value;
5,244✔
118
}
5,244✔
119

120
template <>
121
int64_t IndexArray::from_list<index_Count>(Mixed value, InternalFindResult& /* result_ref */,
122
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
123
{
8,277✔
124
    SortedListComparator slc(column);
8,277✔
125

4,167✔
126
    IntegerColumn::const_iterator it_end = key_values.cend();
8,277✔
127
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
8,277✔
128
    if (lower == it_end)
8,277✔
129
        return 0;
24✔
130

4,155✔
131
    int64_t first_key_value = *lower;
8,253✔
132

4,155✔
133
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,253✔
134
    if (actual != value)
8,253✔
135
        return 0;
24✔
136

4,143✔
137
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
8,229✔
138
    int64_t cnt = upper - lower;
8,229✔
139

4,143✔
140
    return cnt;
8,229✔
141
}
8,229✔
142

143
template <>
144
int64_t IndexArray::from_list<index_FindAll_nocopy>(Mixed value, InternalFindResult& result_ref,
145
                                                    const IntegerColumn& key_values,
146
                                                    const ClusterColumn& column) const
147
{
2,169✔
148
    SortedListComparator slc(column);
2,169✔
149
    IntegerColumn::const_iterator it_end = key_values.cend();
2,169✔
150
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
2,169✔
151
    if (lower == it_end)
2,169✔
152
        return size_t(FindRes_not_found);
48✔
153

1,068✔
154
    ObjKey first_key = ObjKey(*lower);
2,121✔
155

1,068✔
156
    Mixed actual = column.get_value(ObjKey(first_key));
2,121✔
157
    if (actual != value)
2,121✔
158
        return size_t(FindRes_not_found);
63✔
159

1,035✔
160
    // Optimization: check the last entry before trying upper bound.
1,035✔
161
    IntegerColumn::const_iterator upper = it_end;
2,058✔
162
    --upper;
2,058✔
163
    // Single result if upper matches lower
1,035✔
164
    if (upper == lower) {
2,058✔
165
        result_ref.payload = *lower;
318✔
166
        return size_t(FindRes_single);
318✔
167
    }
318✔
168

879✔
169
    // Check string value at upper, if equal return matches in (lower, upper]
879✔
170
    ObjKey last_key = ObjKey(*upper);
1,740✔
171
    actual = column.get_value(ObjKey(last_key));
1,740✔
172
    if (actual == value) {
1,740✔
173
        result_ref.payload = from_ref(key_values.get_ref());
1,209✔
174
        result_ref.start_ndx = lower.get_position();
1,209✔
175
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,209✔
176
        return size_t(FindRes_column);
1,209✔
177
    }
1,209✔
178

273✔
179
    // Last result is not equal, find the upper bound of the range of results.
273✔
180
    // Note that we are passing upper which is cend() - 1 here as we already
273✔
181
    // checked the last item manually.
273✔
182
    upper = std::upper_bound(lower, upper, value, slc);
531✔
183

273✔
184
    result_ref.payload = from_ref(key_values.get_ref());
531✔
185
    result_ref.start_ndx = lower.get_position();
531✔
186
    result_ref.end_ndx = upper.get_position();
531✔
187
    return size_t(FindRes_column);
531✔
188
}
531✔
189

190

191
template <IndexMethod method>
192
int64_t IndexArray::index_string(Mixed value, InternalFindResult& result_ref, const ClusterColumn& column) const
193
{
2,260,623✔
194
    // Return`realm::not_found`, or an index to the (any) match
1,121,034✔
195
    constexpr bool first(method == index_FindFirst);
2,260,623✔
196
    // Return 0, or the number of items that match the specified `value`
1,121,034✔
197
    constexpr bool get_count(method == index_Count);
2,260,623✔
198
    // Same as `index_FindAll` but does not copy matching rows into `column`
1,121,034✔
199
    // returns FindRes_not_found if there are no matches
1,121,034✔
200
    // returns FindRes_single and the row index (literal) in result_ref.payload
1,121,034✔
201
    // or returns FindRes_column and the reference to a column of duplicates in
1,121,034✔
202
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
1,121,034✔
203
    constexpr bool allnocopy(method == index_FindAll_nocopy);
2,260,623✔
204

1,121,034✔
205
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
2,260,623✔
206

1,121,034✔
207
    const char* data = m_data;
2,260,623✔
208
    const char* header;
2,260,623✔
209
    uint_least8_t width = m_width;
2,260,623✔
210
    bool is_inner_node = m_is_inner_bptree_node;
2,260,623✔
211
    typedef StringIndex::key_type key_type;
2,260,623✔
212
    size_t stringoffset = 0;
2,260,623✔
213

1,121,034✔
214
    StringConversionBuffer buffer;
2,260,623✔
215
    StringData index_data = value.get_index_data(buffer);
2,260,623✔
216

1,121,034✔
217
    // Create 4 byte index key
1,121,034✔
218
    key_type key = StringIndex::create_key(index_data, stringoffset);
2,260,623✔
219

1,121,034✔
220
    for (;;) {
2,756,112✔
221
        // Get subnode table
1,362,600✔
222
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
2,756,112✔
223

1,362,600✔
224
        // Find the position matching the key
1,362,600✔
225
        const char* offsets_header = m_alloc.translate(offsets_ref);
2,756,112✔
226
        const char* offsets_data = get_data_from_header(offsets_header);
2,756,112✔
227
        size_t offsets_size = get_size_from_header(offsets_header);
2,756,112✔
228
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,756,112✔
229

1,362,600✔
230
        // If key is outside range, we know there can be no match
1,362,600✔
231
        if (pos == offsets_size)
2,756,112✔
232
            return local_not_found;
905,049✔
233

912,450✔
234
        // Get entry under key
912,450✔
235
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,851,063✔
236
        uint64_t ref = get_direct(data, width, pos_refs);
1,851,063✔
237

912,450✔
238
        if (is_inner_node) {
1,851,063✔
239
            // Set vars for next iteration
26,646✔
240
            header = m_alloc.translate(to_ref(ref));
62,253✔
241
            data = get_data_from_header(header);
62,253✔
242
            width = get_width_from_header(header);
62,253✔
243
            is_inner_node = get_is_inner_bptree_node_from_header(header);
62,253✔
244
            continue;
62,253✔
245
        }
62,253✔
246

885,804✔
247
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,788,810✔
248

885,804✔
249
        if (stored_key != key)
1,788,810✔
250
            return local_not_found;
278,628✔
251

755,448✔
252
        // Literal row index (tagged)
755,448✔
253
        if (ref & 1) {
1,510,182✔
254
            int64_t key_value = int64_t(ref >> 1);
1,061,016✔
255

532,527✔
256
            Mixed a = column.is_fulltext() ? reconstruct_string(stringoffset, key, index_data)
532,611✔
257
                                           : column.get_value(ObjKey(key_value));
1,060,932✔
258
            if (a == value) {
1,061,016✔
259
                result_ref.payload = key_value;
1,058,445✔
260
                return first ? key_value : get_count ? 1 : FindRes_single;
1,051,149✔
261
            }
1,058,445✔
262
            return local_not_found;
2,571✔
263
        }
2,571✔
264

222,921✔
265
        const char* sub_header = m_alloc.translate(ref_type(ref));
449,166✔
266
        const bool sub_isindex = get_context_flag_from_header(sub_header);
449,166✔
267

222,921✔
268
        // List of row indices with common prefix up to this point, in sorted order.
222,921✔
269
        if (!sub_isindex) {
449,166✔
270
            const IntegerColumn sub(m_alloc, ref_type(ref));
15,978✔
271
            if (column.is_fulltext()) {
15,978✔
272
                result_ref.payload = ref;
288✔
273
                result_ref.start_ndx = 0;
288✔
274
                result_ref.end_ndx = sub.size();
288✔
275
                return FindRes_column;
288✔
276
            }
288✔
277

7,857✔
278
            return from_list<method>(value, result_ref, sub, column);
15,690✔
279
        }
15,690✔
280

214,920✔
281
        // Recurse into sub-index;
214,920✔
282
        header = sub_header;
433,188✔
283
        data = get_data_from_header(header);
433,188✔
284
        width = get_width_from_header(header);
433,188✔
285
        is_inner_node = get_is_inner_bptree_node_from_header(header);
433,188✔
286

214,920✔
287
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
214,920✔
288
        stringoffset += 4;
433,188✔
289

214,920✔
290
        // Update 4 byte index key
214,920✔
291
        key = StringIndex::create_key(index_data, stringoffset);
433,188✔
292
    }
433,188✔
293
}
2,260,623✔
294

295

296
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
297
                                   const ClusterColumn& column) const
298
{
1,422✔
299
    // optimization for the most common case, where all the strings under a given subindex are equal
711✔
300
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
1,422✔
301
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
302
    if (first == last) {
1,422✔
303
        if (!first.is_type(type_String))
102✔
304
            return;
×
305
        auto first_str_upper = case_map(first.get_string(), true);
102✔
306
        if (first_str_upper != upper_value) {
102✔
307
            return;
24✔
308
        }
24✔
309

39✔
310
        size_t sz = result.size() + rows.size();
78✔
311
        result.reserve(sz);
78✔
312
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
246✔
313
            result.push_back(ObjKey(*it));
168✔
314
        }
168✔
315
        return;
78✔
316
    }
78✔
317

660✔
318
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
319
    // same subindex column, but still not be identical
660✔
320
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
66,792✔
321
        ObjKey key = ObjKey(*it);
65,472✔
322
        Mixed val = column.get_value(key);
65,472✔
323
        if (val.is_type(type_String)) {
65,472✔
324
            auto upper_str = case_map(val.get_string(), true);
65,472✔
325
            if (upper_str == upper_value) {
65,472✔
326
                result.push_back(key);
1,440✔
327
            }
1,440✔
328
        }
65,472✔
329
    }
65,472✔
330

660✔
331
    return;
1,320✔
332
}
1,320✔
333

334

335
void IndexArray::from_list_all(Mixed value, std::vector<ObjKey>& result, const IntegerColumn& rows,
336
                               const ClusterColumn& column) const
337
{
41,361✔
338
    SortedListComparator slc(column);
41,361✔
339

20,226✔
340
    IntegerColumn::const_iterator it_end = rows.cend();
41,361✔
341
    IntegerColumn::const_iterator lower = std::lower_bound(rows.cbegin(), it_end, value, slc);
41,361✔
342
    if (lower == it_end)
41,361✔
343
        return;
×
344

20,226✔
345
    ObjKey key = ObjKey(*lower);
41,361✔
346

20,226✔
347
    Mixed a = column.get_value(key);
41,361✔
348
    if (a != value)
41,361✔
349
        return;
×
350

20,226✔
351
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
41,361✔
352

20,226✔
353
    // Copy all matches into result column
20,226✔
354
    size_t sz = result.size() + (upper - lower);
41,361✔
355
    result.reserve(sz);
41,361✔
356
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
726,390✔
357
        result.push_back(ObjKey(*it));
685,029✔
358
    }
685,029✔
359

20,226✔
360
    return;
41,361✔
361
}
41,361✔
362

363

364
namespace {
365

366
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
367

368
// replicates the 4 least significant bits each times 8
369
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
370
uint32_t replicate_4_lsb_x8(uint32_t i)
371
{
3,122,895✔
372
    REALM_ASSERT_DEBUG(i <= 15);
3,122,895✔
373
    i *= 0x204081;
3,122,895✔
374
    i &= 0x1010101;
3,122,895✔
375
    i *= 0xff;
3,122,895✔
376
    return i;
3,122,895✔
377
}
3,122,895✔
378

379
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
3,122,895✔
380
    return a ^ ((a ^ b) & mask);
3,122,895✔
381
}
3,122,895✔
382

383
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
384
// final key:
385
// Permutation 0  = "ABCD"
386
// Permutation 1  = "ABCd"
387
// Permutation 8  = "aBCD"
388
// Permutation 15 = "abcd"
389
using key_type = StringIndex::key_type;
390
key_type generate_key(key_type upper, key_type lower, int permutation) {
3,122,892✔
391
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
3,122,892✔
392
}
3,122,892✔
393

394

395
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
396
// when traversing the trees.
397
struct SearchList {
398
    struct Item {
399
        const char* header;
400
        size_t string_offset;
401
        key_type key;
402
    };
403

404
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
405
        : m_upper_value(upper_value)
406
        , m_lower_value(lower_value)
407
    {
6,576✔
408
        m_keys_seen.reserve(num_permutations);
6,576✔
409
    }
6,576✔
410

411
    // Add all unique keys for this level to the internal work stack
412
    void add_all_for_level(const char* header, size_t string_offset)
413
    {
195,219✔
414
        m_keys_seen.clear();
195,219✔
415
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,219✔
416
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,219✔
417
        for (int p = 0; p < num_permutations; ++p) {
3,318,135✔
418
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,561,140✔
419
            // size) being combined incorrectly.
1,561,140✔
420
            const key_type key = generate_key(upper_key, lower_key, p);
3,122,916✔
421
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
3,122,916✔
422
            if (new_key) {
3,122,916✔
423
                m_keys_seen.push_back(key);
3,059,034✔
424
                add_next(header, string_offset, key);
3,059,034✔
425
            }
3,059,034✔
426
        }
3,122,916✔
427
    }
195,219✔
428

429
    bool empty() const
430
    {
3,065,580✔
431
        return m_items.empty();
3,065,580✔
432
    }
3,065,580✔
433

434
    Item get_next()
435
    {
3,059,112✔
436
        Item item = m_items.back();
3,059,112✔
437
        m_items.pop_back();
3,059,112✔
438
        return item;
3,059,112✔
439
    }
3,059,112✔
440

441
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
442
    void add_next(const char* header, size_t string_offset, key_type key)
443
    {
3,058,683✔
444
        m_items.push_back({header, string_offset, key});
3,058,683✔
445
    }
3,058,683✔
446

447
private:
448
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
449

450
    std::vector<Item> m_items;
451

452
    const util::Optional<std::string> m_upper_value;
453
    const util::Optional<std::string> m_lower_value;
454

455
    std::vector<key_type> m_keys_seen;
456
};
457

458

459
} // namespace
460

461

462
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
463
                                      const ClusterColumn& column) const
464
{
6,576✔
465
    REALM_ASSERT(!value.is_null());
6,576✔
466

3,288✔
467
    const util::Optional<std::string> upper_value = case_map(value, true);
6,576✔
468
    const util::Optional<std::string> lower_value = case_map(value, false);
6,576✔
469
    SearchList search_list(upper_value, lower_value);
6,576✔
470

3,288✔
471
    const char* top_header = get_header_from_data(m_data);
6,576✔
472
    search_list.add_all_for_level(top_header, 0);
6,576✔
473

3,288✔
474
    while (!search_list.empty()) {
3,065,718✔
475
        SearchList::Item item = search_list.get_next();
3,059,142✔
476

1,529,361✔
477
        const char* const header = item.header;
3,059,142✔
478
        const size_t string_offset = item.string_offset;
3,059,142✔
479
        const key_type key = item.key;
3,059,142✔
480
        const char* const data = get_data_from_header(header);
3,059,142✔
481
        const uint_least8_t width = get_width_from_header(header);
3,059,142✔
482
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,059,142✔
483

1,529,361✔
484
        // Get subnode table
1,529,361✔
485
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
3,059,142✔
486

1,529,361✔
487
        // Find the position matching the key
1,529,361✔
488
        const char* const offsets_header = m_alloc.translate(offsets_ref);
3,059,142✔
489
        const char* const offsets_data = get_data_from_header(offsets_header);
3,059,142✔
490
        const size_t offsets_size = get_size_from_header(offsets_header);
3,059,142✔
491
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,059,142✔
492

1,529,361✔
493
        // If key is outside range, we know there can be no match
1,529,361✔
494
        if (pos == offsets_size)
3,059,142✔
495
            continue;
810✔
496

1,528,956✔
497
        // Get entry under key
1,528,956✔
498
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
3,058,332✔
499
        const uint64_t ref = get_direct(data, width, pos_refs);
3,058,332✔
500

1,528,956✔
501
        if (is_inner_node) {
3,058,332✔
502
            // Set vars for next iteration
503
            const char* const inner_header = m_alloc.translate(to_ref(ref));
×
504
            search_list.add_next(inner_header, string_offset, key);
×
505
            continue;
×
506
        }
×
507

1,528,956✔
508
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
3,058,332✔
509

1,528,956✔
510
        if (stored_key != key)
3,058,332✔
511
            continue;
2,862,555✔
512

97,923✔
513
        // Literal row index (tagged)
97,923✔
514
        if (ref & 1) {
195,777✔
515
            ObjKey k(int64_t(ref >> 1));
5,640✔
516

2,820✔
517
            // The buffer is needed when for when this is an integer index.
2,820✔
518
            StringConversionBuffer buffer;
5,640✔
519
            const StringData str = column.get_value(k).get_index_data(buffer);
5,640✔
520
            const util::Optional<std::string> upper_str = case_map(str, true);
5,640✔
521
            if (upper_str == upper_value) {
5,640✔
522
                result.push_back(k);
5,592✔
523
            }
5,592✔
524
            continue;
5,640✔
525
        }
5,640✔
526

95,103✔
527
        const char* const sub_header = m_alloc.translate(ref_type(ref));
190,137✔
528
        const bool sub_isindex = get_context_flag_from_header(sub_header);
190,137✔
529

95,103✔
530
        // List of row indices with common prefix up to this point, in sorted order.
95,103✔
531
        if (!sub_isindex) {
190,137✔
532
            const IntegerColumn sub(m_alloc, ref_type(ref));
1,422✔
533
            from_list_all_ins(upper_value, result, sub, column);
1,422✔
534
            continue;
1,422✔
535
        }
1,422✔
536

94,392✔
537
        // Recurse into sub-index;
94,392✔
538
        search_list.add_all_for_level(sub_header, string_offset + 4);
188,715✔
539
    }
188,715✔
540

3,288✔
541
    // sort the result and return a std::vector
3,288✔
542
    std::sort(result.begin(), result.end());
6,576✔
543
}
6,576✔
544

545

546
void IndexArray::index_string_all(Mixed value, std::vector<ObjKey>& result, const ClusterColumn& column) const
547
{
49,635✔
548
    const char* data = m_data;
49,635✔
549
    const char* header;
49,635✔
550
    uint_least8_t width = m_width;
49,635✔
551
    bool is_inner_node = m_is_inner_bptree_node;
49,635✔
552
    size_t stringoffset = 0;
49,635✔
553

24,294✔
554
    StringConversionBuffer buffer;
49,635✔
555
    StringData index_data = value.get_index_data(buffer);
49,635✔
556
    // Create 4 byte index key
24,294✔
557
    key_type key = StringIndex::create_key(index_data, stringoffset);
49,635✔
558

24,294✔
559
    for (;;) {
80,091✔
560
        // Get subnode table
38,952✔
561
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
80,091✔
562

38,952✔
563
        // Find the position matching the key
38,952✔
564
        const char* offsets_header = m_alloc.translate(offsets_ref);
80,091✔
565
        const char* offsets_data = get_data_from_header(offsets_header);
80,091✔
566
        size_t offsets_size = get_size_from_header(offsets_header);
80,091✔
567
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
80,091✔
568

38,952✔
569
        // If key is outside range, we know there can be no match
38,952✔
570
        if (pos == offsets_size)
80,091✔
571
            return;
×
572

38,952✔
573
        // Get entry under key
38,952✔
574
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
80,091✔
575
        uint64_t ref = get_direct(data, width, pos_refs);
80,091✔
576

38,952✔
577
        if (is_inner_node) {
80,091✔
578
            // Set vars for next iteration
579
            header = m_alloc.translate(ref_type(ref));
×
580
            data = get_data_from_header(header);
×
581
            width = get_width_from_header(header);
×
582
            is_inner_node = get_is_inner_bptree_node_from_header(header);
×
583
            continue;
×
584
        }
×
585

38,952✔
586
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
80,091✔
587

38,952✔
588
        if (stored_key != key)
80,091✔
589
            return;
6,120✔
590

35,892✔
591
        // Literal row index (tagged)
35,892✔
592
        if (ref & 1) {
73,971✔
593
            ObjKey k(int64_t(ref >> 1));
2,154✔
594

1,008✔
595
            Mixed a = column.get_value(k);
2,154✔
596
            if (a == value) {
2,154✔
597
                result.push_back(k);
2,154✔
598
                return;
2,154✔
599
            }
2,154✔
600
            return;
×
601
        }
×
602

34,884✔
603
        const char* sub_header = m_alloc.translate(ref_type(ref));
71,817✔
604
        const bool sub_isindex = get_context_flag_from_header(sub_header);
71,817✔
605

34,884✔
606
        // List of row indices with common prefix up to this point, in sorted order.
34,884✔
607
        if (!sub_isindex) {
71,817✔
608
            const IntegerColumn sub(m_alloc, ref_type(ref));
41,361✔
609
            return from_list_all(value, result, sub, column);
41,361✔
610
        }
41,361✔
611

14,658✔
612
        // Recurse into sub-index;
14,658✔
613
        header = sub_header;
30,456✔
614
        data = get_data_from_header(header);
30,456✔
615
        width = get_width_from_header(header);
30,456✔
616
        is_inner_node = get_is_inner_bptree_node_from_header(header);
30,456✔
617

14,658✔
618
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
14,658✔
619
        stringoffset += 4;
30,456✔
620

14,658✔
621
        // Update 4 byte index key
14,658✔
622
        key = StringIndex::create_key(index_data, stringoffset);
30,456✔
623
    }
30,456✔
624
}
49,635✔
625

626
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
627
{
72✔
628
    const char* sub_header = alloc.translate(ref_type(ref));
72✔
629
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
630

36✔
631
    if (sub_isindex) {
72✔
632
        Array tree(alloc);
36✔
633
        tree.init_from_ref(ref);
36✔
634
        auto sz = tree.size();
36✔
635
        for (size_t n = 1; n < sz; n++) {
90✔
636
            auto rot = tree.get_as_ref_or_tagged(n);
54✔
637
            // Literal row index (tagged)
27✔
638
            if (rot.is_tagged()) {
54✔
639
                result.insert(rot.get_as_int());
30✔
640
            }
30✔
641
            else {
24✔
642
                get_all_keys_below(result, rot.get_as_ref(), alloc);
24✔
643
            }
24✔
644
        }
54✔
645
    }
36✔
646
    else {
36✔
647
        IntegerColumn tree(alloc, ref);
36✔
648
        tree.for_all([&result](int64_t i) {
84✔
649
            result.insert(i);
84✔
650
        });
84✔
651
    }
36✔
652
}
72✔
653

654
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
655
{
48✔
656
    size_t stringoffset = 0;
48✔
657

24✔
658
    for (;;) {
72✔
659
        const char* data = NodeHeader::get_data_from_header(header);
72✔
660
        uint_least8_t width = get_width_from_header(header);
72✔
661

36✔
662
        // Create 4 byte lower and upper key
36✔
663
        key_type lower = 0;
72✔
664
        size_t i = 0;
72✔
665
        size_t n = str.size() - stringoffset;
72✔
666
        bool is_at_string_end = (n <= 4);
72✔
667
        if (!is_at_string_end) {
72✔
668
            n = 4;
24✔
669
        }
24✔
670
        while (i < n) {
282✔
671
            lower <<= 8;
210✔
672
            lower += str[stringoffset + i++];
210✔
673
        }
210✔
674
        size_t shift = (4 - i) * 8;
72✔
675
        key_type upper = lower + 1;
72✔
676
        lower <<= shift;
72✔
677
        upper <<= shift;
72✔
678
        upper--;
72✔
679

36✔
680
        // Get index array
36✔
681
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
72✔
682

36✔
683
        // Find the position matching the key
36✔
684
        const char* offsets_header = m_alloc.translate(offsets_ref);
72✔
685
        const char* offsets_data = get_data_from_header(offsets_header);
72✔
686
        size_t offsets_size = get_size_from_header(offsets_header);
72✔
687
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
72✔
688
        // If key is outside range, we know there can be no match
36✔
689
        if (pos == offsets_size)
72✔
690
            return;
×
691

36✔
692
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
72✔
693

36✔
694
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
72✔
695
            bool done = false;
×
696
            while (!done) {
×
697
                // Recursively call with child node
698
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
699
                _index_string_find_all_prefix(result, str, header);
×
700

701
                // Check if current node is past end of key range or last node
702
                auto key = get_direct<32>(offsets_data, pos++);
×
703
                done = key > upper || pos == offsets_size;
×
704
            }
×
705
            return;
×
706
        }
×
707

36✔
708
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
72✔
709
        if (is_at_string_end) {
72✔
710
            // now get all entries from start to end
24✔
711
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
108✔
712
                uint64_t ref = get_direct(data, width, ndx);
60✔
713
                // Literal row index (tagged)
30✔
714
                if (ref & 1) {
60✔
715
                    result.emplace(int64_t(ref >> 1));
12✔
716
                }
12✔
717
                else {
48✔
718
                    get_all_keys_below(result, to_ref(ref), m_alloc);
48✔
719
                }
48✔
720
            }
60✔
721
            return;
48✔
722
        }
48✔
723

12✔
724
        // When we are not at end of string then we are comparing against the whole key
12✔
725
        // and we can have at most one match
12✔
726
        REALM_ASSERT(end == pos + 1);
24✔
727
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
24✔
728
        stringoffset += 4;
24✔
729
    }
24✔
730
}
48✔
731

732
ObjKey IndexArray::index_string_find_first(Mixed value, const ClusterColumn& column) const
733
{
1,592,853✔
734
    InternalFindResult unused;
1,592,853✔
735
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,592,853✔
736
}
1,592,853✔
737

738

739
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, Mixed value, const ClusterColumn& column,
740
                                       bool case_insensitive) const
741
{
56,211✔
742
    if (case_insensitive && value.is_type(type_String)) {
56,211✔
743
        index_string_all_ins(value.get_string(), result, column);
6,576✔
744
    }
6,576✔
745
    else {
49,635✔
746
        index_string_all(value, result, column);
49,635✔
747
    }
49,635✔
748
}
56,211✔
749

750
FindRes IndexArray::index_string_find_all_no_copy(Mixed value, const ClusterColumn& column,
751
                                                  InternalFindResult& result) const
752
{
654,900✔
753
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
654,900✔
754
}
654,900✔
755

756
size_t IndexArray::index_string_count(Mixed value, const ClusterColumn& column) const
757
{
12,864✔
758
    InternalFindResult unused;
12,864✔
759
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
760
}
12,864✔
761

762
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
763
{
179,334✔
764
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
179,295✔
765
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
179,334✔
766
    top->create(type);                                      // Throws
179,334✔
767

88,707✔
768
    // Mark that this is part of index
88,707✔
769
    // (as opposed to columns under leaves)
88,707✔
770
    top->set_context_flag(true);
179,334✔
771

88,707✔
772
    // Add subcolumns for leaves
88,707✔
773
    Array values(alloc);
179,334✔
774
    values.create(Array::type_Normal);       // Throws
179,334✔
775
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
179,334✔
776
    top->add(values.get_ref());              // first entry in refs points to offsets
179,334✔
777

88,707✔
778
    return top;
179,334✔
779
}
179,334✔
780

781
void StringIndex::set_target(const ClusterColumn& target_column) noexcept
782
{
×
783
    m_target_column = target_column;
×
784
}
×
785

786

787
StringIndex::key_type StringIndex::get_last_key() const
788
{
54,843✔
789
    Array offsets(m_array->get_alloc());
54,843✔
790
    get_child(*m_array, 0, offsets);
54,843✔
791
    return key_type(offsets.back());
54,843✔
792
}
54,843✔
793

794

795
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
796
{
9,267,354✔
797
    // Create 4 byte index key
4,618,689✔
798
    key_type key = create_key(index_data, offset);
9,267,354✔
799
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
9,267,354✔
800
}
9,267,354✔
801

802
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
803
                                                   const IntegerColumnIterator& lower)
804
{
2,063,835✔
805
    SortedListComparator slc(m_target_column);
2,063,835✔
806
    // At this point there exists duplicates of this value, we need to
1,031,943✔
807
    // insert value beside it's duplicates so that rows are also sorted
1,031,943✔
808
    // in ascending order.
1,031,943✔
809
    IntegerColumn::const_iterator upper = std::upper_bound(lower, list.cend(), value, slc);
2,063,835✔
810
    // find insert position (the list has to be kept in sorted order)
1,031,943✔
811
    // In most cases the refs will be added to the end. So we test for that
1,031,943✔
812
    // first to see if we can avoid the binary search for insert position
1,031,943✔
813
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,063,835✔
814
    int64_t last_key_value = *last;
2,063,835✔
815
    if (key.value >= last_key_value) {
2,063,835✔
816
        list.insert(upper.get_position(), key.value);
2,028,930✔
817
    }
2,028,930✔
818
    else {
34,905✔
819
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
34,905✔
820
        list.insert(inner_lower.get_position(), key.value);
34,905✔
821
    }
34,905✔
822
}
2,063,835✔
823

824
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
825
{
1,950✔
826
    SortedListComparator slc(m_target_column);
1,950✔
827
    IntegerColumn::const_iterator it_end = list.cend();
1,950✔
828
    IntegerColumn::const_iterator lower = std::lower_bound(list.cbegin(), it_end, value, slc);
1,950✔
829

1,014✔
830
    if (lower == it_end) {
1,950✔
831
        // Not found and everything is less, just append it to the end.
747✔
832
        list.add(key.value);
1,494✔
833
    }
1,494✔
834
    else {
456✔
835
        ObjKey lower_key = ObjKey(*lower);
456✔
836
        Mixed lower_value = get(lower_key);
456✔
837

267✔
838
        if (lower_value != value) {
456✔
839
            list.insert(lower.get_position(), key.value);
456✔
840
        }
456✔
841
        else {
×
842
            // At this point there exists duplicates of this value, we need to
843
            // insert value beside it's duplicates so that rows are also sorted
844
            // in ascending order.
845
            insert_to_existing_list_at_lower(key, value, list, lower);
×
846
        }
×
847
    }
456✔
848
}
1,950✔
849

850

851
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
852
{
2,088✔
853
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
2,088✔
854

621✔
855
    // Create 4 byte index key
621✔
856
    key_type key = create_key(index_data, offset);
2,088✔
857

621✔
858
    // Get subnode table
621✔
859
    Allocator& alloc = m_array->get_alloc();
2,088✔
860
    Array values(alloc);
2,088✔
861
    get_child(*m_array, 0, values);
2,088✔
862
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,088✔
863

621✔
864
    size_t ins_pos = values.lower_bound_int(key);
2,088✔
865
    if (ins_pos == values.size()) {
2,088✔
866
        // When key is outside current range, we can just add it
621✔
867
        values.add(key);
2,088✔
868
        m_array->add(ref);
2,088✔
869
        return;
2,088✔
870
    }
2,088✔
871

872
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
873
    // Since we only use this for moving existing values to new
874
    // subindexes, there should never be an existing match.
875
    key_type k = key_type(values.get(ins_pos));
×
876
    REALM_ASSERT(k != key);
×
877
#endif // LCOV_EXCL_STOP ignore debug code
×
878

879
    // If key is not present we add it at the correct location
880
    values.insert(ins_pos, key);
×
881
    m_array->insert(ins_pos + 1, ref);
×
882
}
×
883

884

885
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
886
{
9,268,101✔
887
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
9,268,101✔
888
    switch (nc.type) {
9,268,101✔
889
        case NodeChange::change_None:
9,265,956✔
890
            return;
9,265,956✔
891
        case NodeChange::change_InsertBefore: {
✔
892
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
×
893
            new_node.node_add_key(nc.ref1);
×
894
            new_node.node_add_key(get_ref());
×
895
            m_array->init_from_ref(new_node.get_ref());
×
896
            m_array->update_parent();
×
897
            return;
×
898
        }
×
899
        case NodeChange::change_InsertAfter: {
9✔
900
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
9✔
901
            new_node.node_add_key(get_ref());
9✔
902
            new_node.node_add_key(nc.ref1);
9✔
903
            m_array->init_from_ref(new_node.get_ref());
9✔
904
            m_array->update_parent();
9✔
905
            return;
9✔
906
        }
×
907
        case NodeChange::change_Split: {
63✔
908
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
63✔
909
            new_node.node_add_key(nc.ref1);
63✔
910
            new_node.node_add_key(nc.ref2);
63✔
911
            m_array->init_from_ref(new_node.get_ref());
63✔
912
            m_array->update_parent();
63✔
913
            return;
63✔
914
        }
×
915
    }
×
916
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
917
}
×
918

919

920
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
921
                                               const Mixed& value)
922
{
9,321,468✔
923
    Allocator& alloc = m_array->get_alloc();
9,321,468✔
924
    if (m_array->is_inner_bptree_node()) {
9,321,468✔
925
        // Get subnode table
18,318✔
926
        Array keys(alloc);
50,064✔
927
        get_child(*m_array, 0, keys);
50,064✔
928
        REALM_ASSERT(m_array->size() == keys.size() + 1);
50,064✔
929

18,318✔
930
        // Find the subnode containing the item
18,318✔
931
        size_t node_ndx = keys.lower_bound_int(key);
50,064✔
932
        if (node_ndx == keys.size()) {
50,064✔
933
            // node can never be empty, so try to fit in last item
4,944✔
934
            node_ndx = keys.size() - 1;
14,283✔
935
        }
14,283✔
936

18,318✔
937
        // Get sublist
18,318✔
938
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
50,064✔
939
        ref_type ref = m_array->get_as_ref(refs_ndx);
50,064✔
940
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
50,064✔
941

18,318✔
942
        // Insert item
18,318✔
943
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
50,064✔
944
        if (nc.type == NodeChange::change_None) {
50,064✔
945
            // update keys
18,309✔
946
            key_type last_key = target.get_last_key();
50,031✔
947
            keys.set(node_ndx, last_key);
50,031✔
948
            return NodeChange::change_None; // no new nodes
50,031✔
949
        }
50,031✔
950

9✔
951
        if (nc.type == NodeChange::change_InsertAfter) {
33✔
952
            ++node_ndx;
12✔
953
            ++refs_ndx;
12✔
954
        }
12✔
955

9✔
956
        // If there is room, just update node directly
9✔
957
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
33✔
958
            if (nc.type == NodeChange::change_Split) {
33✔
959
                node_insert_split(node_ndx, nc.ref2);
21✔
960
            }
21✔
961
            else {
12✔
962
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
12✔
963
            }
12✔
964
            return NodeChange::change_None;
33✔
965
        }
33✔
966

967
        // Else create new node
UNCOV
968
        StringIndex new_node(inner_node_tag(), alloc);
×
UNCOV
969
        if (nc.type == NodeChange::change_Split) {
×
970
            // update offset for left node
971
            key_type last_key = target.get_last_key();
×
972
            keys.set(node_ndx, last_key);
×
973

974
            new_node.node_add_key(nc.ref2);
×
975
            ++node_ndx;
×
976
            ++refs_ndx;
×
977
        }
×
UNCOV
978
        else {
×
UNCOV
979
            new_node.node_add_key(nc.ref1);
×
UNCOV
980
        }
×
981

UNCOV
982
        switch (node_ndx) {
×
983
            case 0: // insert before
×
984
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
985
            case REALM_MAX_BPNODE_SIZE: // insert after
×
986
                if (nc.type == NodeChange::change_Split)
×
987
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
988
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
989
            default: // split
×
990
                // Move items after split to new node
991
                size_t len = m_array->size();
×
992
                for (size_t i = refs_ndx; i < len; ++i) {
×
993
                    ref_type ref_i = m_array->get_as_ref(i);
×
994
                    new_node.node_add_key(ref_i);
×
995
                }
×
996
                keys.truncate(node_ndx);
×
997
                m_array->truncate(refs_ndx);
×
998
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
999
        }
9,271,404✔
1000
    }
9,271,404✔
1001
    else {
9,271,404✔
1002
        // Is there room in the list?
4,621,743✔
1003
        Array old_keys(alloc);
9,271,404✔
1004
        get_child(*m_array, 0, old_keys);
9,271,404✔
1005
        const size_t old_offsets_size = old_keys.size();
9,271,404✔
1006
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
9,271,404✔
1007

4,621,743✔
1008
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
9,271,404✔
1009

4,621,743✔
1010
        // See if we can fit entry into current leaf
4,621,743✔
1011
        // Works if there is room or it can join existing entries
4,621,743✔
1012
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
9,271,404✔
1013
            return NodeChange::change_None;
9,266,526✔
1014

1,104✔
1015
        // Create new list for item (a leaf)
1,104✔
1016
        StringIndex new_list(m_target_column, alloc);
4,878✔
1017

1,104✔
1018
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
4,878✔
1019

1,104✔
1020
        size_t ndx = old_keys.lower_bound_int(key);
4,878✔
1021

1,104✔
1022
        // insert before
1,104✔
1023
        if (ndx == 0)
4,878✔
1024
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
×
1025

1,104✔
1026
        // insert after
1,104✔
1027
        if (ndx == old_offsets_size)
4,878✔
1028
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
21✔
1029

1,095✔
1030
        // split
1,095✔
1031
        Array new_keys(alloc);
4,857✔
1032
        get_child(*new_list.m_array, 0, new_keys);
4,857✔
1033
        // Move items after split to new list
1,095✔
1034
        for (size_t i = ndx; i < old_offsets_size; ++i) {
45,222✔
1035
            int64_t v2 = old_keys.get(i);
40,365✔
1036
            int64_t v3 = m_array->get(i + 1);
40,365✔
1037

16,623✔
1038
            new_keys.add(v2);
40,365✔
1039
            new_list.m_array->add(v3);
40,365✔
1040
        }
40,365✔
1041
        old_keys.truncate(ndx);
4,857✔
1042
        m_array->truncate(ndx + 1);
4,857✔
1043

1,095✔
1044
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
4,857✔
1045
    }
4,857✔
1046
}
9,321,468✔
1047

1048

1049
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1050
{
21✔
1051
    REALM_ASSERT(m_array->is_inner_bptree_node());
21✔
1052
    REALM_ASSERT(new_ref);
21✔
1053

6✔
1054
    Allocator& alloc = m_array->get_alloc();
21✔
1055
    Array offsets(alloc);
21✔
1056
    get_child(*m_array, 0, offsets);
21✔
1057

6✔
1058
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
21✔
1059
    REALM_ASSERT(ndx < offsets.size());
21✔
1060
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
21✔
1061

6✔
1062
    // Get sublists
6✔
1063
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
21✔
1064
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
21✔
1065
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
21✔
1066
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
21✔
1067

6✔
1068
    // Update original key
6✔
1069
    key_type last_key = orig_col.get_last_key();
21✔
1070
    offsets.set(ndx, last_key);
21✔
1071

6✔
1072
    // Insert new ref
6✔
1073
    key_type new_key = new_col.get_last_key();
21✔
1074
    offsets.insert(ndx + 1, new_key);
21✔
1075
    m_array->insert(ndx + 2, new_ref);
21✔
1076
}
21✔
1077

1078

1079
void StringIndex::node_insert(size_t ndx, size_t ref)
1080
{
12✔
1081
    REALM_ASSERT(ref);
12✔
1082
    REALM_ASSERT(m_array->is_inner_bptree_node());
12✔
1083

3✔
1084
    Allocator& alloc = m_array->get_alloc();
12✔
1085
    Array offsets(alloc);
12✔
1086
    get_child(*m_array, 0, offsets);
12✔
1087
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
12✔
1088

3✔
1089
    REALM_ASSERT(ndx <= offsets.size());
12✔
1090
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
12✔
1091

3✔
1092
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
12✔
1093
    key_type last_key = col.get_last_key();
12✔
1094

3✔
1095
    offsets.insert(ndx, last_key);
12✔
1096
    m_array->insert(ndx + 1, ref);
12✔
1097
}
12✔
1098

1099
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1100
                              bool noextend)
1101
{
9,259,350✔
1102
    REALM_ASSERT(!m_array->is_inner_bptree_node());
9,259,350✔
1103

4,613,670✔
1104
    // Get subnode table
4,613,670✔
1105
    Allocator& alloc = m_array->get_alloc();
9,259,350✔
1106
    Array keys(alloc);
9,259,350✔
1107
    get_child(*m_array, 0, keys);
9,259,350✔
1108
    REALM_ASSERT(m_array->size() == keys.size() + 1);
9,259,350✔
1109

4,613,670✔
1110
    // If we are keeping the complete string in the index
4,613,670✔
1111
    // we want to know if this is the last part
4,613,670✔
1112
    bool is_at_string_end = offset + 4 >= index_data.size();
9,259,350✔
1113

4,613,670✔
1114
    size_t ins_pos = keys.lower_bound_int(key);
9,259,350✔
1115
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
9,259,350✔
1116

4,613,670✔
1117
    if (ins_pos == keys.size()) {
9,259,350✔
1118
        if (noextend)
336,534✔
1119
            return false;
21✔
1120

165,741✔
1121
        // When key is outside current range, we can just add it
165,741✔
1122
        keys.add(key);
336,513✔
1123
        if (!m_target_column.is_fulltext() || is_at_string_end) {
336,513✔
1124
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
335,688✔
1125
            m_array->add(shifted);
335,688✔
1126
        }
335,688✔
1127
        else {
825✔
1128
            // create subindex for rest of string
423✔
1129
            StringIndex subindex(m_target_column, m_array->get_alloc());
825✔
1130
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
825✔
1131
            m_array->add(subindex.get_ref());
825✔
1132
        }
825✔
1133
        return true;
336,513✔
1134
    }
336,513✔
1135

4,447,920✔
1136
    key_type k = key_type(keys.get(ins_pos));
8,922,816✔
1137

4,447,920✔
1138
    // If key is not present we add it at the correct location
4,447,920✔
1139
    if (k != key) {
8,922,816✔
1140
        if (noextend)
723,738✔
1141
            return false;
84✔
1142

352,872✔
1143
        keys.insert(ins_pos, key);
723,654✔
1144
        if (!m_target_column.is_fulltext() || is_at_string_end) {
723,654✔
1145
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
722,037✔
1146
            m_array->insert(ins_pos_refs, shifted);
722,037✔
1147
        }
722,037✔
1148
        else {
1,617✔
1149
            // create subindex for rest of string
762✔
1150
            StringIndex subindex(m_target_column, m_array->get_alloc());
1,617✔
1151
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
1,617✔
1152
            m_array->insert(ins_pos_refs, subindex.get_ref());
1,617✔
1153
        }
1,617✔
1154
        return true;
723,654✔
1155
    }
723,654✔
1156

4,095,015✔
1157
    // This leaf already has a slot for for the key
4,095,015✔
1158

4,095,015✔
1159
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,199,078✔
1160
    size_t suboffset = offset + s_index_key_length;
8,199,078✔
1161

4,095,015✔
1162
    // Single match (lowest bit set indicates literal row_ndx)
4,095,015✔
1163
    if ((slot_value & 1) != 0) {
8,199,078✔
1164
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
51,819✔
1165
        Mixed v2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
51,537✔
1166
        if (v2 == value) {
51,819✔
1167
            // Strings are equal but this is not a list.
4,776✔
1168
            // Create a list and add both rows.
4,776✔
1169

4,776✔
1170
            // convert to list (in sorted order)
4,776✔
1171
            Array row_list(alloc);
9,627✔
1172
            row_list.create(Array::type_Normal); // Throws
9,627✔
1173
            row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
8,994✔
1174
            row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
8,994✔
1175
            m_array->set(ins_pos_refs, row_list.get_ref());
9,627✔
1176
        }
9,627✔
1177
        else {
42,192✔
1178
            StringConversionBuffer buffer;
42,192✔
1179
            auto index_data_2 = v2.get_index_data(buffer);
42,192✔
1180
            if (index_data == index_data_2 || suboffset > s_max_offset) {
42,192✔
1181
                // These strings have the same prefix up to this point but we
303✔
1182
                // don't want to recurse further, create a list in sorted order.
303✔
1183
                bool row_ndx_first = value.compare_signed(v2) < 0;
579✔
1184
                Array row_list(alloc);
579✔
1185
                row_list.create(Array::type_Normal); // Throws
579✔
1186
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
456✔
1187
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
456✔
1188
                m_array->set(ins_pos_refs, row_list.get_ref());
579✔
1189
            }
579✔
1190
            else {
41,613✔
1191
                // These strings have the same prefix up to this point but they
20,826✔
1192
                // are actually not equal. Extend the tree recursivly until the
20,826✔
1193
                // prefix of these strings is different.
20,826✔
1194
                StringIndex subindex(m_target_column, m_array->get_alloc());
41,613✔
1195
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
41,613✔
1196
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
41,613✔
1197
                // Join the string of SubIndices to the current position of m_array
20,826✔
1198
                m_array->set(ins_pos_refs, subindex.get_ref());
41,613✔
1199
            }
41,613✔
1200
        }
42,192✔
1201
        return true;
51,819✔
1202
    }
51,819✔
1203

4,069,110✔
1204
    // If there already is a list of matches, we see if we fit there
4,069,110✔
1205
    // or it has to be split into a subindex
4,069,110✔
1206
    ref_type ref = ref_type(slot_value);
8,147,259✔
1207
    char* header = alloc.translate(ref);
8,147,259✔
1208
    if (!Array::get_context_flag_from_header(header)) {
8,147,259✔
1209
        IntegerColumn sub(alloc, ref); // Throws
2,066,862✔
1210
        sub.set_parent(m_array.get(), ins_pos_refs);
2,066,862✔
1211

1,032,777✔
1212
        IntegerColumn::const_iterator it_end = sub.cend();
2,066,862✔
1213
        IntegerColumn::const_iterator lower = it_end;
2,066,862✔
1214

1,032,777✔
1215
        auto value_exists_in_list = [&]() {
2,067,744✔
1216
            if (m_target_column.is_fulltext()) {
2,067,744✔
1217
                return reconstruct_string(offset, key, index_data) == value.get_string();
348✔
1218
            }
348✔
1219
            SortedListComparator slc(m_target_column);
2,067,396✔
1220
            lower = std::lower_bound(sub.cbegin(), it_end, value, slc);
2,067,396✔
1221

1,033,248✔
1222
            if (lower != it_end) {
2,067,396✔
1223
                Mixed lower_value = get(ObjKey(*lower));
2,064,417✔
1224
                if (lower_value == value) {
2,064,417✔
1225
                    return true;
2,063,496✔
1226
                }
2,063,496✔
1227
            }
3,900✔
1228
            return false;
3,900✔
1229
        };
3,900✔
1230

1,032,777✔
1231
        // If we found the value in this list, add the duplicate to the list.
1,032,777✔
1232
        if (value_exists_in_list()) {
2,066,862✔
1233
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
2,063,862✔
1234
        }
2,063,862✔
1235
        else {
3,000✔
1236
            // If the list only stores duplicates we are free to branch and
834✔
1237
            // and create a sub index with this existing list as one of the
834✔
1238
            // leafs, but if the list doesn't only contain duplicates we
834✔
1239
            // must respect that we store a common key prefix up to this
834✔
1240
            // point and insert into the existing list.
834✔
1241
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
3,000✔
1242
            StringConversionBuffer buffer;
3,000✔
1243
            StringData index_data_2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data)
837✔
1244
                                                                    : get(key_of_any_dup).get_index_data(buffer);
2,997✔
1245
            if (index_data == index_data_2 || suboffset > s_max_offset) {
4,032✔
1246
                insert_to_existing_list(obj_key, value, sub);
1,950✔
1247
            }
1,950✔
1248
            else {
2,147,484,877✔
1249
#ifdef REALM_DEBUG
2,147,484,877✔
1250
                bool contains_only_duplicates = true;
2,147,484,877✔
1251
                if (!m_target_column.is_fulltext() && sub.size() > 1) {
2,147,485,111✔
1252
                    ObjKey first_key = ObjKey(sub.get(0));
1,272✔
1253
                    ObjKey last_key = ObjKey(sub.back());
1,272✔
1254
                    auto first = get(first_key);
1,272✔
1255
                    auto last = get(last_key);
1,272✔
1256
                    // Since the list is kept in sorted order, the first and
426✔
1257
                    // last values will be the same only if the whole list is
426✔
1258
                    // storing duplicate values.
426✔
1259
                    if (first != last) {
1,272✔
1260
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
1261
                    }
×
1262
                }
1,272✔
1263
                REALM_ASSERT_DEBUG(contains_only_duplicates);
2,147,484,877✔
1264
#endif
2,147,484,877✔
1265
                // The buffer is needed for when this is an integer index.
2,147,483,647✔
1266
                StringIndex subindex(m_target_column, m_array->get_alloc());
2,147,484,877✔
1267
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
2,147,484,877✔
1268
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
2,147,484,877✔
1269
                m_array->set(ins_pos_refs, subindex.get_ref());
2,147,484,877✔
1270
            }
2,147,484,877✔
1271
        }
3,000✔
1272
        return true;
2,066,862✔
1273
    }
2,066,862✔
1274

3,036,333✔
1275
    // The key matches, but there is a subindex here so go down a level in the tree.
3,036,333✔
1276
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,080,397✔
1277
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
6,080,397✔
1278

3,036,333✔
1279
    return true;
6,080,397✔
1280
}
6,080,397✔
1281

1282
Mixed StringIndex::get(ObjKey key) const
1283
{
3,697,482✔
1284
    return m_target_column.get_value(key);
3,697,482✔
1285
}
3,697,482✔
1286

1287
void StringIndex::erase(ObjKey key)
1288
{
915,087✔
1289
    StringConversionBuffer buffer;
915,087✔
1290
    std::string_view value{(get(key).get_index_data(buffer))};
915,087✔
1291
    if (m_target_column.is_fulltext()) {
915,087✔
1292
        auto words = Tokenizer::get_instance()->reset(value).get_all_tokens();
72✔
1293
        for (auto& w : words) {
2,460✔
1294
            erase_string(key, w);
2,460✔
1295
        }
2,460✔
1296
    }
72✔
1297
    else {
915,015✔
1298
        erase_string(key, value);
915,015✔
1299
    }
915,015✔
1300
}
915,087✔
1301

1302
namespace {
1303
template <typename T>
1304
void intersect(std::vector<ObjKey>& result, T& keys)
1305
{
276✔
1306
    if (result.empty()) {
276✔
1307
        result.reserve(keys.size());
180✔
1308
        for (auto k : keys) {
498✔
1309
            result.emplace_back(k);
498✔
1310
        }
498✔
1311
    }
180✔
1312
    else {
96✔
1313
        auto it = result.begin();
96✔
1314
        auto keep = it;
96✔
1315
        auto m = keys.begin();
96✔
1316

48✔
1317
        // only keep intersection
48✔
1318
        while (it != result.end() && m != keys.end()) {
354✔
1319
            int64_t int_val = *m;
258✔
1320
            if (it->value < int_val) {
258✔
1321
                it++; // don't keep if match is not in new set
18✔
1322
            }
18✔
1323
            else if (it->value > int_val) {
240✔
1324
                ++m; // ignore new matches
102✔
1325
            }
102✔
1326
            else {
138✔
1327
                // Found both places - make sure it is kept
69✔
1328
                if (keep < it)
138✔
1329
                    *keep = *it;
6✔
1330
                ++keep;
138✔
1331
                ++it;
138✔
1332
                ++m;
138✔
1333
            }
138✔
1334
        }
258✔
1335
        if (keep != result.end()) {
96✔
1336
            result.erase(keep, result.end());
24✔
1337
        }
24✔
1338
    }
96✔
1339
}
276✔
1340

1341
struct FindResWrapper {
1342
    InternalFindResult& res;
1343
    IntegerColumn& indexes;
1344
    size_t n = 0;
1345
    size_t size()
1346
    {
144✔
1347
        return res.end_ndx - res.start_ndx;
144✔
1348
    }
144✔
1349
    auto begin()
1350
    {
228✔
1351
        return indexes.cbegin();
228✔
1352
    }
228✔
1353
    auto end()
1354
    {
384✔
1355
        return indexes.cend();
384✔
1356
    }
384✔
1357
};
1358
} // namespace
1359

1360
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1361
{
360✔
1362
    InternalFindResult res;
360✔
1363
    REALM_ASSERT(result.empty());
360✔
1364

180✔
1365
    auto tokenizer = Tokenizer::get_instance();
360✔
1366
    tokenizer->reset({value.data(), value.size()});
360✔
1367
    auto [includes, excludes] = tokenizer->get_search_tokens();
360✔
1368
    if (includes.empty()) {
360✔
1369
        if (excludes.empty()) {
24✔
1370
            throw InvalidArgument("Missing search token");
6✔
1371
        }
6✔
1372
        result = m_target_column.get_all_keys();
18✔
1373
    }
18✔
1374
    else {
336✔
1375
        for (auto& token : includes) {
414✔
1376
            if (token.back() == '*') {
414✔
1377
                std::set<int64_t> keys;
48✔
1378
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
48✔
1379
                intersect(result, keys);
48✔
1380
            }
48✔
1381
            else {
366✔
1382
                switch (find_all_no_copy(StringData{token}, res)) {
366✔
1383
                    case FindRes_not_found:
12✔
1384
                        result.clear();
12✔
1385
                        break;
12✔
1386
                    case FindRes_column: {
228✔
1387
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
228✔
1388
                        FindResWrapper wrapper{res, indexes};
228✔
1389
                        intersect(result, wrapper);
228✔
1390
                        break;
228✔
1391
                    }
×
1392
                    case FindRes_single:
126✔
1393
                        // merge in single res
63✔
1394
                        if (result.empty()) {
126✔
1395
                            result.emplace_back(res.payload);
96✔
1396
                        }
96✔
1397
                        else {
30✔
1398
                            ObjKey key(res.payload);
30✔
1399
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1400
                            if (pos != result.end() && key == *pos) {
30✔
1401
                                result.clear();
30✔
1402
                                result.push_back(key);
30✔
1403
                            }
30✔
1404
                            else {
×
1405
                                result.clear();
×
1406
                            }
×
1407
                        }
30✔
1408
                        break;
126✔
1409
                }
414✔
1410
            }
414✔
1411
            if (result.empty())
414✔
1412
                return;
18✔
1413
        }
414✔
1414
    }
336✔
1415

180✔
1416
    for (auto& token : excludes) {
348✔
1417
        if (token.back() == '*') {
96✔
1418
            throw IllegalOperation("Exclude by prefix is not implemented");
×
1419
        }
×
1420
        if (result.empty())
96✔
1421
            return;
×
1422

48✔
1423
        switch (find_all_no_copy(StringData{token}, res)) {
96✔
1424
            case FindRes_not_found:
✔
1425
                // Nothing to exclude
1426
                break;
×
1427
            case FindRes_column: {
60✔
1428
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1429

30✔
1430
                auto it = result.begin();
60✔
1431
                auto keep = it;
60✔
1432
                size_t m = res.start_ndx;
60✔
1433
                auto idx_val = indexes.get(m);
60✔
1434

30✔
1435
                while (it != result.end()) {
390✔
1436
                    if (it->value < idx_val) {
330✔
1437
                        // Not found in excludes
78✔
1438
                        if (keep < it)
156✔
1439
                            *keep = *it;
156✔
1440
                        ++keep;
156✔
1441
                        ++it;
156✔
1442
                    }
156✔
1443
                    else {
174✔
1444
                        if (it->value == idx_val) {
174✔
1445
                            // found in excludes - don't keep
75✔
1446
                            ++it;
150✔
1447
                        }
150✔
1448
                        ++m;
174✔
1449
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
153✔
1450
                    }
174✔
1451
                }
330✔
1452
                if (keep != result.end()) {
60✔
1453
                    result.erase(keep, result.end());
60✔
1454
                }
60✔
1455
                break;
60✔
1456
            }
×
1457
            case FindRes_single: {
36✔
1458
                // exclude single res
18✔
1459
                ObjKey key(res.payload);
36✔
1460
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1461
                if (pos != result.end() && key == *pos) {
36✔
1462
                    result.erase(pos);
36✔
1463
                }
36✔
1464
                break;
36✔
1465
            }
×
1466
        }
96✔
1467
    }
96✔
1468
}
336✔
1469

1470

1471
void StringIndex::clear()
1472
{
2,940✔
1473
    Array values(m_array->get_alloc());
2,940✔
1474
    get_child(*m_array, 0, values);
2,940✔
1475
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,940✔
1476

813✔
1477
    values.clear();
2,940✔
1478
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
2,940✔
1479

813✔
1480
    size_t size = 1;
2,940✔
1481
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
2,940✔
1482

813✔
1483
    m_array->set_type(Array::type_HasRefs);
2,940✔
1484
}
2,940✔
1485

1486

1487
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
1488
{
1,159,188✔
1489
    Allocator& alloc = m_array->get_alloc();
1,159,188✔
1490
    Array values(alloc);
1,159,188✔
1491
    get_child(*m_array, 0, values);
1,159,188✔
1492
    REALM_ASSERT(m_array->size() == values.size() + 1);
1,159,188✔
1493

579,426✔
1494
    // Create 4 byte index key
579,426✔
1495
    key_type key = create_key(index_data, offset);
1,159,188✔
1496

579,426✔
1497
    const size_t pos = values.lower_bound_int(key);
1,159,188✔
1498
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,159,188✔
1499
    REALM_ASSERT(pos != values.size());
1,159,188✔
1500

579,426✔
1501
    if (m_array->is_inner_bptree_node()) {
1,159,188✔
1502
        ref_type ref = m_array->get_as_ref(pos_refs);
4,758✔
1503
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
4,758✔
1504
        node.do_delete(obj_key, index_data, offset);
4,758✔
1505

2,361✔
1506
        // Update the ref
2,361✔
1507
        if (node.is_empty()) {
4,758✔
1508
            values.erase(pos);
×
1509
            m_array->erase(pos_refs);
×
1510
            node.destroy();
×
1511
        }
×
1512
        else {
4,758✔
1513
            key_type max_val = node.get_last_key();
4,758✔
1514
            if (max_val != key_type(values.get(pos)))
4,758✔
1515
                values.set(pos, max_val);
×
1516
        }
4,758✔
1517
    }
4,758✔
1518
    else {
1,154,430✔
1519
        uint64_t ref = m_array->get(pos_refs);
1,154,430✔
1520
        if (ref & 1) {
1,154,430✔
1521
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
572,883✔
1522
            values.erase(pos);
572,883✔
1523
            m_array->erase(pos_refs);
572,883✔
1524
        }
572,883✔
1525
        else {
581,547✔
1526
            // A real ref either points to a list or a subindex
290,742✔
1527
            char* header = alloc.translate(ref_type(ref));
581,547✔
1528
            if (Array::get_context_flag_from_header(header)) {
581,547✔
1529
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
236,913✔
1530
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
236,913✔
1531

118,593✔
1532
                if (subindex.is_empty()) {
236,913✔
1533
                    values.erase(pos);
13,635✔
1534
                    m_array->erase(pos_refs);
13,635✔
1535
                    subindex.destroy();
13,635✔
1536
                }
13,635✔
1537
            }
236,913✔
1538
            else {
344,634✔
1539
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
344,634✔
1540
                sub.set_parent(m_array.get(), pos_refs);
344,634✔
1541
                size_t r = sub.find_first(obj_key.value);
344,634✔
1542
                size_t sub_size = sub.size(); // Slow
344,634✔
1543
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
344,634✔
1544
                sub.erase(r);
344,634✔
1545

172,149✔
1546
                if (sub_size == 1) {
344,634✔
1547
                    values.erase(pos);
3,351✔
1548
                    m_array->erase(pos_refs);
3,351✔
1549
                    sub.destroy();
3,351✔
1550
                }
3,351✔
1551
            }
344,634✔
1552
        }
581,547✔
1553
    }
1,154,430✔
1554
}
1,159,188✔
1555

1556
void StringIndex::erase_string(ObjKey key, StringData value)
1557
{
917,583✔
1558
    do_delete(key, value, 0);
917,583✔
1559

458,517✔
1560
    // Collapse top nodes with single item
458,517✔
1561
    while (m_array->is_inner_bptree_node()) {
917,583✔
1562
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
4,758✔
1563
        if (m_array->size() > 2)
4,758✔
1564
            break;
4,758✔
1565

1566
        ref_type ref = m_array->get_as_ref(1);
×
1567
        m_array->set(1, 1); // avoid destruction of the extracted ref
×
1568
        m_array->destroy_deep();
×
1569
        m_array->init_from_ref(ref);
×
1570
        m_array->update_parent();
×
1571
    }
×
1572
}
917,583✔
1573

1574
namespace {
1575

1576
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
1577
{
6,366✔
1578
    Allocator& alloc = node.get_alloc();
6,366✔
1579
    Array child(alloc);
6,366✔
1580
    size_t n = node.size();
6,366✔
1581
    REALM_ASSERT(n >= 1);
6,366✔
1582
    if (node.is_inner_bptree_node()) {
6,366✔
1583
        // Inner node
1584
        for (size_t i = 1; i < n; ++i) {
×
1585
            ref_type ref = node.get_as_ref(i);
×
1586
            child.init_from_ref(ref);
×
1587
            if (has_duplicate_values(child, target_col))
×
1588
                return true;
×
1589
        }
×
1590
        return false;
×
1591
    }
6,366✔
1592

3,183✔
1593
    // Leaf node
3,183✔
1594
    for (size_t i = 1; i < n; ++i) {
32,850✔
1595
        int_fast64_t value = node.get(i);
28,740✔
1596
        bool is_single_row_index = (value & 1) != 0;
28,740✔
1597
        if (is_single_row_index)
28,740✔
1598
            continue;
23,280✔
1599

2,730✔
1600
        ref_type ref = to_ref(value);
5,460✔
1601
        child.init_from_ref(ref);
5,460✔
1602

2,730✔
1603
        bool is_subindex = child.get_context_flag();
5,460✔
1604
        if (is_subindex) {
5,460✔
1605
            if (has_duplicate_values(child, target_col))
4,872✔
1606
                return true;
1,800✔
1607
            continue;
3,072✔
1608
        }
3,072✔
1609

294✔
1610
        // Child is root of B+-tree of row indexes
294✔
1611
        IntegerColumn sub(alloc, ref);
588✔
1612
        if (sub.size() > 1) {
588✔
1613
            ObjKey first_key = ObjKey(sub.get(0));
480✔
1614
            ObjKey last_key = ObjKey(sub.back());
480✔
1615
            Mixed first = target_col.get_value(first_key);
480✔
1616
            Mixed last = target_col.get_value(last_key);
480✔
1617
            // Since the list is kept in sorted order, the first and
240✔
1618
            // last values will be the same only if the whole list is
240✔
1619
            // storing duplicate values.
240✔
1620
            if (first == last) {
480✔
1621
                return true;
432✔
1622
            }
432✔
1623
            // There may also be several short lists combined, so we need to
24✔
1624
            // check each of these individually for duplicates.
24✔
1625
            IntegerColumn::const_iterator it = sub.cbegin();
48✔
1626
            IntegerColumn::const_iterator it_end = sub.cend();
48✔
1627
            SortedListComparator slc(target_col);
48✔
1628
            while (it != it_end) {
144✔
1629
                Mixed it_data = target_col.get_value(ObjKey(*it));
120✔
1630
                IntegerColumn::const_iterator next = std::upper_bound(it, it_end, it_data, slc);
120✔
1631
                size_t count_of_value = next - it; // row index subtraction in `sub`
120✔
1632
                if (count_of_value > 1) {
120✔
1633
                    return true;
24✔
1634
                }
24✔
1635
                it = next;
96✔
1636
            }
96✔
1637
        }
48✔
1638
    }
588✔
1639

3,183✔
1640
    return false;
5,238✔
1641
}
6,366✔
1642

1643
} // anonymous namespace
1644

1645

1646
bool StringIndex::has_duplicate_values() const noexcept
1647
{
1,494✔
1648
    return ::has_duplicate_values(*m_array, m_target_column);
1,494✔
1649
}
1,494✔
1650

1651

1652
bool StringIndex::is_empty() const
1653
{
241,824✔
1654
    return m_array->size() == 1; // first entry in refs points to offsets
241,824✔
1655
}
241,824✔
1656

1657
template <>
1658
void StringIndex::insert<StringData>(ObjKey key, StringData value)
1659
{
591,579✔
1660
    StringConversionBuffer buffer;
591,579✔
1661

295,629✔
1662
    if (this->m_target_column.is_fulltext()) {
591,579✔
1663
        auto words = Tokenizer::get_instance()->reset(std::string_view(value)).get_all_tokens();
198✔
1664

99✔
1665
        for (auto& word : words) {
936✔
1666
            Mixed m(word);
936✔
1667
            insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
936✔
1668
        }
936✔
1669
    }
198✔
1670
    else {
591,381✔
1671
        Mixed m(value);
591,381✔
1672
        insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
591,381✔
1673
    }
591,381✔
1674
}
591,579✔
1675

1676
template <>
1677
void StringIndex::set<StringData>(ObjKey key, StringData new_value)
1678
{
439,251✔
1679
    StringConversionBuffer buffer;
439,251✔
1680
    Mixed old_value = get(key);
439,251✔
1681
    Mixed new_value2 = Mixed(new_value);
439,251✔
1682

219,507✔
1683
    if (this->m_target_column.is_fulltext()) {
439,251✔
1684
        auto tokenizer = Tokenizer::get_instance();
168✔
1685
        StringData old_string = old_value.get_index_data(buffer);
168✔
1686
        std::set<std::string> old_words;
168✔
1687

84✔
1688
        if (old_string.size() > 0) {
168✔
1689
            tokenizer->reset({old_string.data(), old_string.size()});
6✔
1690
            old_words = tokenizer->get_all_tokens();
6✔
1691
        }
6✔
1692

84✔
1693
        tokenizer->reset({new_value.data(), new_value.size()});
168✔
1694
        auto new_words = tokenizer->get_all_tokens();
168✔
1695

84✔
1696
        auto w1 = old_words.begin();
168✔
1697
        auto w2 = new_words.begin();
168✔
1698

84✔
1699
        // Do a diff, deleting words no longer present and
84✔
1700
        // inserting new words
84✔
1701
        while (w1 != old_words.end() && w2 != new_words.end()) {
540✔
1702
            if (*w1 < *w2) {
372✔
1703
                erase_string(key, *w1);
126✔
1704
                ++w1;
126✔
1705
            }
126✔
1706
            else if (*w2 < *w1) {
246✔
1707
                Mixed m(*w2);
198✔
1708
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
198✔
1709
                ++w2;
198✔
1710
            }
198✔
1711
            else {
48✔
1712
                ++w1;
48✔
1713
                ++w2;
48✔
1714
            }
48✔
1715
        }
372✔
1716
        while (w1 != old_words.end()) {
168✔
1717
            erase_string(key, *w1);
×
1718
            ++w1;
×
1719
        }
×
1720
        while (w2 != new_words.end()) {
3,354✔
1721
            Mixed m(*w2);
3,186✔
1722
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
3,186✔
1723

1,593✔
1724
            ++w2;
3,186✔
1725
        }
3,186✔
1726
    }
168✔
1727
    else {
439,083✔
1728
        if (REALM_LIKELY(new_value2 != old_value)) {
439,083✔
1729
            // We must erase this row first because erase uses find_first which
216,174✔
1730
            // might find the duplicate if we insert before erasing.
216,174✔
1731
            erase(key); // Throws
432,807✔
1732

216,174✔
1733
            auto index_data = new_value2.get_index_data(buffer);
432,807✔
1734
            insert_with_offset(key, index_data, new_value2, 0); // Throws
432,807✔
1735
        }
432,807✔
1736
    }
439,083✔
1737
}
439,251✔
1738

1739
void StringIndex::node_add_key(ref_type ref)
1740
{
144✔
1741
    REALM_ASSERT(ref);
144✔
1742
    REALM_ASSERT(m_array->is_inner_bptree_node());
144✔
1743

66✔
1744
    Allocator& alloc = m_array->get_alloc();
144✔
1745
    Array offsets(alloc);
144✔
1746
    get_child(*m_array, 0, offsets);
144✔
1747
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
144✔
1748
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
144✔
1749

66✔
1750
    Array new_top(alloc);
144✔
1751
    Array new_offsets(alloc);
144✔
1752
    new_top.init_from_ref(ref);
144✔
1753
    new_offsets.init_from_ref(new_top.get_as_ref(0));
144✔
1754
    REALM_ASSERT(!new_offsets.is_empty());
144✔
1755

66✔
1756
    int64_t key = new_offsets.back();
144✔
1757
    offsets.add(key);
144✔
1758
    m_array->add(ref);
144✔
1759
}
144✔
1760

1761
// Must return true if value of object(key) is less than needle.
1762
bool SortedListComparator::operator()(int64_t key_value, Mixed needle) // used in lower_bound
1763
{
20,364,177✔
1764
    Mixed a = m_column.get_value(ObjKey(key_value));
20,364,177✔
1765
    if (a.is_null() && !needle.is_null())
20,364,177✔
1766
        return true;
696✔
1767
    else if (needle.is_null() && !a.is_null())
20,363,481✔
1768
        return false;
18✔
1769
    else if (a.is_null() && needle.is_null())
20,363,463✔
1770
        return false;
358,665✔
1771

10,009,305✔
1772
    if (a == needle)
20,004,798✔
1773
        return false;
20,023,491✔
1774

2,147,483,647✔
1775
    // The Mixed::operator< uses a lexicograpical comparison, therefore we
2,147,483,647✔
1776
    // cannot use our utf8_compare here because we need to be consistent with
2,147,483,647✔
1777
    // using the same compare method as how these strings were they were put
2,147,483,647✔
1778
    // into this ordered column in the first place.
2,147,483,647✔
1779
    return a.compare_signed(needle) < 0;
4,294,967,294✔
1780
}
4,294,967,294✔
1781

1782

1783
// Must return true if value of needle is less than value of object(key).
1784
bool SortedListComparator::operator()(Mixed needle, int64_t key_value) // used in upper_bound
1785
{
18,258,759✔
1786
    Mixed a = m_column.get_value(ObjKey(key_value));
18,258,759✔
1787
    if (needle == a) {
18,298,017✔
1788
        return false;
18,298,017✔
1789
    }
18,298,017✔
1790
    return !(*this)(key_value, needle);
4,294,967,294✔
1791
}
4,294,967,294✔
1792

1793
// LCOV_EXCL_START ignore debug functions
1794

1795

1796
void StringIndex::verify() const
1797
{
1,320✔
1798
#ifdef REALM_DEBUG
1,320✔
1799
    m_array->verify();
1,320✔
1800

660✔
1801
    Allocator& alloc = m_array->get_alloc();
1,320✔
1802
    const size_t array_size = m_array->size();
1,320✔
1803

660✔
1804
    // Get first matching row for every key
660✔
1805
    if (m_array->is_inner_bptree_node()) {
1,320✔
1806
        for (size_t i = 1; i < array_size; ++i) {
×
1807
            size_t ref = m_array->get_as_ref(i);
×
1808
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1809
            ndx.verify();
×
1810
        }
×
1811
    }
×
1812
    else {
1,320✔
1813
        size_t column_size = m_target_column.size();
1,320✔
1814
        for (size_t i = 1; i < array_size; ++i) {
2,736✔
1815
            int64_t ref = m_array->get(i);
1,416✔
1816

708✔
1817
            // low bit set indicate literal ref (shifted)
708✔
1818
            if (ref & 1) {
1,416✔
1819
                size_t r = to_size_t((uint64_t(ref) >> 1));
96✔
1820
                REALM_ASSERT_EX(r < column_size, r, column_size);
96✔
1821
            }
96✔
1822
            else {
1,320✔
1823
                // A real ref either points to a list or a subindex
660✔
1824
                char* header = alloc.translate(to_ref(ref));
1,320✔
1825
                if (Array::get_context_flag_from_header(header)) {
1,320✔
1826
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
1,296✔
1827
                    ndx.verify();
1,296✔
1828
                }
1,296✔
1829
                else {
24✔
1830
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
24✔
1831
                    IntegerColumn::const_iterator it = sub.cbegin();
24✔
1832
                    IntegerColumn::const_iterator it_end = sub.cend();
24✔
1833
                    SortedListComparator slc(m_target_column);
24✔
1834
                    Mixed previous = get(ObjKey(*it));
24✔
1835
                    size_t last_row = to_size_t(*it);
24✔
1836

12✔
1837
                    // Check that strings listed in sub are in sorted order
12✔
1838
                    // and if there are duplicates, that the row numbers are
12✔
1839
                    // sorted in the group of duplicates.
12✔
1840
                    while (it != it_end) {
96✔
1841
                        Mixed it_data = get(ObjKey(*it));
72✔
1842
                        size_t it_row = to_size_t(*it);
72✔
1843
                        REALM_ASSERT(previous.compare_signed(it_data) <= 0);
72✔
1844
                        if (it != sub.cbegin() && previous == it_data) {
72✔
1845
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
×
1846
                        }
×
1847
                        last_row = it_row;
72✔
1848
                        previous = get(ObjKey(*it));
72✔
1849
                        ++it;
72✔
1850
                    }
72✔
1851
                }
24✔
1852
            }
1,320✔
1853
        }
1,416✔
1854
    }
1,320✔
1855
// FIXME: Extend verification along the lines of IntegerColumn::verify().
660✔
1856
#endif
1,320✔
1857
}
1,320✔
1858

1859
#ifdef REALM_DEBUG
1860

1861
template <class T>
1862
void StringIndex::verify_entries(const ClusterColumn& column) const
1863
{
1864
    std::vector<ObjKey> results;
1865

1866
    auto it = column.begin();
1867
    auto end = column.end();
1868
    auto col = column.get_column_key();
1869
    while (it != end) {
1870
        ObjKey key = it->get_key();
1871
        T value = it->get<T>(col);
1872

1873
        find_all(results, value);
1874

1875
        auto ndx = find(results.begin(), results.end(), key);
1876
        REALM_ASSERT(ndx != results.end());
1877
        size_t found = count(value);
1878
        REALM_ASSERT_EX(found >= 1, found);
1879
        results.clear();
1880
    }
1881
}
1882

1883
namespace {
1884

1885
void out_hex(std::ostream& out, uint64_t val)
1886
{
×
1887
    if (val) {
×
1888
        out_hex(out, val >> 8);
×
1889
        if (char c = val & 0xFF) {
×
1890
            out << c;
×
1891
        }
×
1892
    }
×
1893
}
×
1894

1895
} // namespace
1896

1897
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
1898
{
×
1899
    int indent = level * 2;
×
1900
    Allocator& alloc = node.get_alloc();
×
1901
    Array subnode(alloc);
×
1902

1903
    size_t node_size = node.size();
×
1904
    REALM_ASSERT(node_size >= 1);
×
1905

1906
    out << std::hex;
×
1907

1908
    bool node_is_leaf = !node.is_inner_bptree_node();
×
1909
    if (node_is_leaf) {
×
1910
        out << std::setw(indent) << ""
×
1911
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1912
    }
×
1913
    else {
×
1914
        out << std::setw(indent) << ""
×
1915
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1916
    }
×
1917

1918
    subnode.init_from_ref(to_ref(node.front()));
×
1919
    out << std::setw(indent) << ""
×
1920
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
1921
    if (subnode.is_empty()) {
×
1922
        out << "no keys";
×
1923
    }
×
1924
    else {
×
1925
        out << "keys: ";
×
1926
        for (size_t i = 0; i != subnode.size(); ++i) {
×
1927
            if (i != 0)
×
1928
                out << ", ";
×
1929
            out_hex(out, subnode.get(i));
×
1930
        }
×
1931
    }
×
1932
    out << ")\n";
×
1933

1934
    if (node_is_leaf) {
×
1935
        for (size_t i = 1; i != node_size; ++i) {
×
1936
            int_fast64_t value = node.get(i);
×
1937
            bool is_single_row_index = (value & 1) != 0;
×
1938
            if (is_single_row_index) {
×
1939
                out << std::setw(indent) << ""
×
1940
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
1941
                continue;
×
1942
            }
×
1943
            subnode.init_from_ref(to_ref(value));
×
1944
            bool is_subindex = subnode.get_context_flag();
×
1945
            if (is_subindex) {
×
1946
                out << std::setw(indent) << ""
×
1947
                    << "  Subindex\n";
×
1948
                dump_node_structure(subnode, out, level + 2);
×
1949
                continue;
×
1950
            }
×
1951
            IntegerColumn indexes(alloc, to_ref(value));
×
1952
            out << std::setw(indent) << ""
×
1953
                << "  List of row indexes\n";
×
1954
            indexes.dump_values(out, level + 2);
×
1955
        }
×
1956
        return;
×
1957
    }
×
1958

1959

1960
    size_t num_children = node_size - 1;
×
1961
    size_t child_ref_begin = 1;
×
1962
    size_t child_ref_end = 1 + num_children;
×
1963
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
1964
        subnode.init_from_ref(node.get_as_ref(i));
×
1965
        dump_node_structure(subnode, out, level + 1);
×
1966
    }
×
1967
}
×
1968

1969

1970
void StringIndex::do_dump_node_structure(std::ostream& out, int level) const
1971
{
×
1972
    dump_node_structure(*m_array, out, level);
×
1973
}
×
1974

1975
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc