• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 1847

18 Nov 2023 02:13PM UTC coverage: 91.712% (+0.02%) from 91.697%
1847

push

Evergreen

web-flow
Index on list of strings (#7142)

92336 of 169214 branches covered (0.0%)

345 of 348 new or added lines in 10 files covered. (99.14%)

80 existing lines in 10 files now uncovered.

231620 of 252552 relevant lines covered (91.71%)

6644424.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

88.88
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/list.hpp>
31
#include <realm/timestamp.hpp>
32
#include <realm/column_integer.hpp>
33
#include <realm/unicode.hpp>
34
#include <realm/tokenizer.hpp>
35

36
using namespace realm;
37
using namespace realm::util;
38

39
namespace {
40

41
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
42
{
21,773,358✔
43
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
21,773,358✔
44
    child.init_from_ref(child_ref);
21,773,358✔
45
    child.set_parent(&parent, child_ref_ndx);
21,773,358✔
46
}
21,773,358✔
47

48
// This method reconstructs the string inserted in the search index based on a string
49
// that matches so far and the last key (only works if complete strings are stored in the index)
50
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
51
{
1,140✔
52
    if (key == 0)
1,140✔
53
        return StringData();
×
54

570✔
55
    size_t rest_len = 4;
1,140✔
56
    char* k = reinterpret_cast<char*>(&key);
1,140✔
57
    if (k[0] == 'X')
1,140✔
58
        rest_len = 3;
234✔
59
    else if (k[1] == 'X')
906✔
60
        rest_len = 2;
384✔
61
    else if (k[2] == 'X')
522✔
62
        rest_len = 1;
228✔
63
    else if (k[3] == 'X')
294✔
64
        rest_len = 0;
48✔
65

570✔
66
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,140✔
67

570✔
68
    return StringData(new_string.data(), offset + rest_len);
1,140✔
69
}
1,140✔
70

71

72
} // anonymous namespace
73

74
DataType ClusterColumn::get_data_type() const
75
{
×
76
    const Table* table = m_cluster_tree->get_owning_table();
×
77
    return table->get_column_type(m_column_key);
×
78
}
×
79

80
Mixed ClusterColumn::get_value(ObjKey key) const
81
{
43,532,160✔
82
    const Obj obj{m_cluster_tree->get(key)};
43,532,160✔
83
    return obj.get_any(m_column_key);
43,532,160✔
84
}
43,532,160✔
85

86
Lst<String> ClusterColumn::get_list(ObjKey key) const
87
{
6✔
88
    const Obj obj{m_cluster_tree->get(key)};
6✔
89
    return obj.get_list<String>(m_column_key);
6✔
90
}
6✔
91

92
std::vector<ObjKey> ClusterColumn::get_all_keys() const
93
{
18✔
94
    std::vector<ObjKey> ret;
18✔
95
    ret.reserve(m_cluster_tree->size());
18✔
96
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
97
        auto sz = cluster->node_size();
18✔
98
        for (size_t i = 0; i < sz; i++) {
198✔
99
            ret.push_back(cluster->get_real_key(i));
180✔
100
        }
180✔
101

9✔
102
        return IteratorControl::AdvanceToNext;
18✔
103
    });
18✔
104
    return ret;
18✔
105
}
18✔
106

107
template <>
108
int64_t IndexArray::from_list<index_FindFirst>(Mixed value, InternalFindResult& /* result_ref */,
109
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
110
{
5,421✔
111
    SortedListComparator slc(column);
5,421✔
112

2,466✔
113
    IntegerColumn::const_iterator it_end = key_values.cend();
5,421✔
114
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
5,421✔
115
    if (lower == it_end)
5,421✔
116
        return null_key.value;
×
117

2,466✔
118
    int64_t first_key_value = *lower;
5,421✔
119

2,466✔
120
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,421✔
121
    if (actual != value)
5,421✔
122
        return null_key.value;
×
123

2,466✔
124
    return first_key_value;
5,421✔
125
}
5,421✔
126

127
template <>
128
int64_t IndexArray::from_list<index_Count>(Mixed value, InternalFindResult& /* result_ref */,
129
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
130
{
8,250✔
131
    SortedListComparator slc(column);
8,250✔
132

4,125✔
133
    IntegerColumn::const_iterator it_end = key_values.cend();
8,250✔
134
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
8,250✔
135
    if (lower == it_end)
8,250✔
136
        return 0;
24✔
137

4,113✔
138
    int64_t first_key_value = *lower;
8,226✔
139

4,113✔
140
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,226✔
141
    if (actual != value)
8,226✔
142
        return 0;
24✔
143

4,101✔
144
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
8,202✔
145
    int64_t cnt = upper - lower;
8,202✔
146

4,101✔
147
    return cnt;
8,202✔
148
}
8,202✔
149

150
template <>
151
int64_t IndexArray::from_list<index_FindAll_nocopy>(Mixed value, InternalFindResult& result_ref,
152
                                                    const IntegerColumn& key_values,
153
                                                    const ClusterColumn& column) const
154
{
2,118✔
155
    SortedListComparator slc(column);
2,118✔
156
    IntegerColumn::const_iterator it_end = key_values.cend();
2,118✔
157
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
2,118✔
158
    if (lower == it_end)
2,118✔
159
        return size_t(FindRes_not_found);
48✔
160

1,044✔
161
    ObjKey first_key = ObjKey(*lower);
2,070✔
162

1,044✔
163
    Mixed actual = column.get_value(ObjKey(first_key));
2,070✔
164
    if (actual != value)
2,070✔
165
        return size_t(FindRes_not_found);
60✔
166

1,011✔
167
    // Optimization: check the last entry before trying upper bound.
1,011✔
168
    IntegerColumn::const_iterator upper = it_end;
2,010✔
169
    --upper;
2,010✔
170
    // Single result if upper matches lower
1,011✔
171
    if (upper == lower) {
2,010✔
172
        result_ref.payload = *lower;
321✔
173
        return size_t(FindRes_single);
321✔
174
    }
321✔
175

843✔
176
    // Check string value at upper, if equal return matches in (lower, upper]
843✔
177
    ObjKey last_key = ObjKey(*upper);
1,689✔
178
    actual = column.get_value(ObjKey(last_key));
1,689✔
179
    if (actual == value) {
1,689✔
180
        result_ref.payload = from_ref(key_values.get_ref());
1,224✔
181
        result_ref.start_ndx = lower.get_position();
1,224✔
182
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,224✔
183
        return size_t(FindRes_column);
1,224✔
184
    }
1,224✔
185

231✔
186
    // Last result is not equal, find the upper bound of the range of results.
231✔
187
    // Note that we are passing upper which is cend() - 1 here as we already
231✔
188
    // checked the last item manually.
231✔
189
    upper = std::upper_bound(lower, upper, value, slc);
465✔
190

231✔
191
    result_ref.payload = from_ref(key_values.get_ref());
465✔
192
    result_ref.start_ndx = lower.get_position();
465✔
193
    result_ref.end_ndx = upper.get_position();
465✔
194
    return size_t(FindRes_column);
465✔
195
}
465✔
196

197

198
template <IndexMethod method>
199
int64_t IndexArray::index_string(Mixed value, InternalFindResult& result_ref, const ClusterColumn& column) const
200
{
2,322,726✔
201
    // Return`realm::not_found`, or an index to the (any) match
1,151,877✔
202
    constexpr bool first(method == index_FindFirst);
2,322,726✔
203
    // Return 0, or the number of items that match the specified `value`
1,151,877✔
204
    constexpr bool get_count(method == index_Count);
2,322,726✔
205
    // Same as `index_FindAll` but does not copy matching rows into `column`
1,151,877✔
206
    // returns FindRes_not_found if there are no matches
1,151,877✔
207
    // returns FindRes_single and the row index (literal) in result_ref.payload
1,151,877✔
208
    // or returns FindRes_column and the reference to a column of duplicates in
1,151,877✔
209
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
1,151,877✔
210
    constexpr bool allnocopy(method == index_FindAll_nocopy);
2,322,726✔
211

1,151,877✔
212
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
2,322,726✔
213

1,151,877✔
214
    const char* data = m_data;
2,322,726✔
215
    const char* header;
2,322,726✔
216
    uint_least8_t width = m_width;
2,322,726✔
217
    bool is_inner_node = m_is_inner_bptree_node;
2,322,726✔
218
    typedef StringIndex::key_type key_type;
2,322,726✔
219
    size_t stringoffset = 0;
2,322,726✔
220

1,151,877✔
221
    StringConversionBuffer buffer;
2,322,726✔
222
    StringData index_data = value.get_index_data(buffer);
2,322,726✔
223

1,151,877✔
224
    // Create 4 byte index key
1,151,877✔
225
    key_type key = StringIndex::create_key(index_data, stringoffset);
2,322,726✔
226

1,151,877✔
227
    for (;;) {
2,875,959✔
228
        // Get subnode table
1,424,925✔
229
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
2,875,959✔
230

1,424,925✔
231
        // Find the position matching the key
1,424,925✔
232
        const char* offsets_header = m_alloc.translate(offsets_ref);
2,875,959✔
233
        const char* offsets_data = get_data_from_header(offsets_header);
2,875,959✔
234
        size_t offsets_size = get_size_from_header(offsets_header);
2,875,959✔
235
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,875,959✔
236

1,424,925✔
237
        // If key is outside range, we know there can be no match
1,424,925✔
238
        if (pos == offsets_size)
2,875,959✔
239
            return local_not_found;
908,538✔
240

972,426✔
241
        // Get entry under key
972,426✔
242
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,967,421✔
243
        uint64_t ref = get_direct(data, width, pos_refs);
1,967,421✔
244

972,426✔
245
        if (is_inner_node) {
1,967,421✔
246
            // Set vars for next iteration
53,544✔
247
            header = m_alloc.translate(to_ref(ref));
116,040✔
248
            data = get_data_from_header(header);
116,040✔
249
            width = get_width_from_header(header);
116,040✔
250
            is_inner_node = get_is_inner_bptree_node_from_header(header);
116,040✔
251
            continue;
116,040✔
252
        }
116,040✔
253

918,882✔
254
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,851,381✔
255

918,882✔
256
        if (stored_key != key)
1,851,381✔
257
            return local_not_found;
338,163✔
258

757,341✔
259
        // Literal row index (tagged)
757,341✔
260
        if (ref & 1) {
1,513,218✔
261
            int64_t key_value = int64_t(ref >> 1);
1,060,071✔
262

530,064✔
263
            Mixed a = column.full_word() ? reconstruct_string(stringoffset, key, index_data)
530,148✔
264
                                         : column.get_value(ObjKey(key_value));
1,059,987✔
265
            if (a == value) {
1,060,071✔
266
                result_ref.payload = key_value;
1,057,467✔
267
                return first ? key_value : get_count ? 1 : FindRes_single;
1,050,159✔
268
            }
1,057,467✔
269
            return local_not_found;
2,604✔
270
        }
2,604✔
271

227,277✔
272
        const char* sub_header = m_alloc.translate(ref_type(ref));
453,147✔
273
        const bool sub_isindex = get_context_flag_from_header(sub_header);
453,147✔
274

227,277✔
275
        // List of row indices with common prefix up to this point, in sorted order.
227,277✔
276
        if (!sub_isindex) {
453,147✔
277
            const IntegerColumn sub(m_alloc, ref_type(ref));
16,077✔
278
            if (column.full_word()) {
16,077✔
279
                result_ref.payload = ref;
288✔
280
                result_ref.start_ndx = 0;
288✔
281
                result_ref.end_ndx = sub.size();
288✔
282
                return FindRes_column;
288✔
283
            }
288✔
284

7,659✔
285
            return from_list<method>(value, result_ref, sub, column);
15,789✔
286
        }
15,789✔
287

219,474✔
288
        // Recurse into sub-index;
219,474✔
289
        header = sub_header;
437,070✔
290
        data = get_data_from_header(header);
437,070✔
291
        width = get_width_from_header(header);
437,070✔
292
        is_inner_node = get_is_inner_bptree_node_from_header(header);
437,070✔
293

219,474✔
294
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
219,474✔
295
        stringoffset += 4;
437,070✔
296

219,474✔
297
        // Update 4 byte index key
219,474✔
298
        key = StringIndex::create_key(index_data, stringoffset);
437,070✔
299
    }
437,070✔
300
}
2,322,726✔
301

302

303
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
304
                                   const ClusterColumn& column) const
305
{
1,422✔
306
    // optimization for the most common case, where all the strings under a given subindex are equal
711✔
307
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
1,422✔
308
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
309
    if (first == last) {
1,422✔
310
        if (!first.is_type(type_String))
102✔
311
            return;
×
312
        auto first_str_upper = case_map(first.get_string(), true);
102✔
313
        if (first_str_upper != upper_value) {
102✔
314
            return;
24✔
315
        }
24✔
316

39✔
317
        size_t sz = result.size() + rows.size();
78✔
318
        result.reserve(sz);
78✔
319
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
246✔
320
            result.push_back(ObjKey(*it));
168✔
321
        }
168✔
322
        return;
78✔
323
    }
78✔
324

660✔
325
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
326
    // same subindex column, but still not be identical
660✔
327
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
66,792✔
328
        ObjKey key = ObjKey(*it);
65,472✔
329
        Mixed val = column.get_value(key);
65,472✔
330
        if (val.is_type(type_String)) {
65,472✔
331
            auto upper_str = case_map(val.get_string(), true);
65,472✔
332
            if (upper_str == upper_value) {
65,472✔
333
                result.push_back(key);
1,440✔
334
            }
1,440✔
335
        }
65,472✔
336
    }
65,472✔
337

660✔
338
    return;
1,320✔
339
}
1,320✔
340

341

342
void IndexArray::from_list_all(Mixed value, std::vector<ObjKey>& result, const IntegerColumn& rows,
343
                               const ClusterColumn& column) const
344
{
37,521✔
345
    if (column.full_word()) {
37,521✔
346
        result.reserve(rows.size());
60✔
347
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
186✔
348
            result.push_back(ObjKey(*it));
126✔
349
        }
126✔
350

30✔
351
        return;
60✔
352
    }
60✔
353

18,225✔
354
    SortedListComparator slc(column);
37,461✔
355

18,225✔
356
    IntegerColumn::const_iterator it_end = rows.cend();
37,461✔
357
    IntegerColumn::const_iterator lower = std::lower_bound(rows.cbegin(), it_end, value, slc);
37,461✔
358
    if (lower == it_end)
37,461✔
359
        return;
×
360

18,225✔
361
    ObjKey key = ObjKey(*lower);
37,461✔
362

18,225✔
363
    Mixed a = column.get_value(key);
37,461✔
364
    if (a != value)
37,461✔
365
        return;
×
366

18,225✔
367
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
37,461✔
368

18,225✔
369
    // Copy all matches into result column
18,225✔
370
    size_t sz = result.size() + (upper - lower);
37,461✔
371
    result.reserve(sz);
37,461✔
372
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
722,949✔
373
        result.push_back(ObjKey(*it));
685,488✔
374
    }
685,488✔
375
}
37,461✔
376

377

378
namespace {
379

380
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
381

382
// replicates the 4 least significant bits each times 8
383
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
384
uint32_t replicate_4_lsb_x8(uint32_t i)
385
{
3,117,984✔
386
    REALM_ASSERT_DEBUG(i <= 15);
3,117,984✔
387
    i *= 0x204081;
3,117,984✔
388
    i &= 0x1010101;
3,117,984✔
389
    i *= 0xff;
3,117,984✔
390
    return i;
3,117,984✔
391
}
3,117,984✔
392

393
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
3,118,182✔
394
    return a ^ ((a ^ b) & mask);
3,118,182✔
395
}
3,118,182✔
396

397
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
398
// final key:
399
// Permutation 0  = "ABCD"
400
// Permutation 1  = "ABCd"
401
// Permutation 8  = "aBCD"
402
// Permutation 15 = "abcd"
403
using key_type = StringIndex::key_type;
404
key_type generate_key(key_type upper, key_type lower, int permutation) {
3,117,858✔
405
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
3,117,858✔
406
}
3,117,858✔
407

408

409
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
410
// when traversing the trees.
411
struct SearchList {
412
    struct Item {
413
        const char* header;
414
        size_t string_offset;
415
        key_type key;
416
    };
417

418
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
419
        : m_upper_value(upper_value)
420
        , m_lower_value(lower_value)
421
    {
6,576✔
422
        m_keys_seen.reserve(num_permutations);
6,576✔
423
    }
6,576✔
424

425
    // Add all unique keys for this level to the internal work stack
426
    void add_all_for_level(const char* header, size_t string_offset)
427
    {
195,096✔
428
        m_keys_seen.clear();
195,096✔
429
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,096✔
430
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,096✔
431
        for (int p = 0; p < num_permutations; ++p) {
3,313,518✔
432
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,556,646✔
433
            // size) being combined incorrectly.
1,556,646✔
434
            const key_type key = generate_key(upper_key, lower_key, p);
3,118,422✔
435
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
3,118,422✔
436
            if (new_key) {
3,118,422✔
437
                m_keys_seen.push_back(key);
3,054,099✔
438
                add_next(header, string_offset, key);
3,054,099✔
439
            }
3,054,099✔
440
        }
3,118,422✔
441
    }
195,096✔
442

443
    bool empty() const
444
    {
3,039,774✔
445
        return m_items.empty();
3,039,774✔
446
    }
3,039,774✔
447

448
    Item get_next()
449
    {
3,042,285✔
450
        Item item = m_items.back();
3,042,285✔
451
        m_items.pop_back();
3,042,285✔
452
        return item;
3,042,285✔
453
    }
3,042,285✔
454

455
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
456
    void add_next(const char* header, size_t string_offset, key_type key)
457
    {
3,047,142✔
458
        m_items.push_back({header, string_offset, key});
3,047,142✔
459
    }
3,047,142✔
460

461
private:
462
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
463

464
    std::vector<Item> m_items;
465

466
    const util::Optional<std::string> m_upper_value;
467
    const util::Optional<std::string> m_lower_value;
468

469
    std::vector<key_type> m_keys_seen;
470
};
471

472

473
} // namespace
474

475

476
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
477
                                      const ClusterColumn& column) const
478
{
6,576✔
479
    REALM_ASSERT(!value.is_null());
6,576✔
480

3,288✔
481
    const util::Optional<std::string> upper_value = case_map(value, true);
6,576✔
482
    const util::Optional<std::string> lower_value = case_map(value, false);
6,576✔
483
    SearchList search_list(upper_value, lower_value);
6,576✔
484

3,288✔
485
    const char* top_header = get_header_from_data(m_data);
6,576✔
486
    search_list.add_all_for_level(top_header, 0);
6,576✔
487

3,288✔
488
    while (!search_list.empty()) {
3,049,164✔
489
        SearchList::Item item = search_list.get_next();
3,042,588✔
490

1,512,807✔
491
        const char* const header = item.header;
3,042,588✔
492
        const size_t string_offset = item.string_offset;
3,042,588✔
493
        const key_type key = item.key;
3,042,588✔
494
        const char* const data = get_data_from_header(header);
3,042,588✔
495
        const uint_least8_t width = get_width_from_header(header);
3,042,588✔
496
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,042,588✔
497

1,512,807✔
498
        // Get subnode table
1,512,807✔
499
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
3,042,588✔
500

1,512,807✔
501
        // Find the position matching the key
1,512,807✔
502
        const char* const offsets_header = m_alloc.translate(offsets_ref);
3,042,588✔
503
        const char* const offsets_data = get_data_from_header(offsets_header);
3,042,588✔
504
        const size_t offsets_size = get_size_from_header(offsets_header);
3,042,588✔
505
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,042,588✔
506

1,512,807✔
507
        // If key is outside range, we know there can be no match
1,512,807✔
508
        if (pos == offsets_size)
3,042,588✔
509
            continue;
810✔
510

1,512,402✔
511
        // Get entry under key
1,512,402✔
512
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
3,041,778✔
513
        const uint64_t ref = get_direct(data, width, pos_refs);
3,041,778✔
514

1,512,402✔
515
        if (is_inner_node) {
3,041,778✔
516
            // Set vars for next iteration
517
            const char* const inner_header = m_alloc.translate(to_ref(ref));
×
518
            search_list.add_next(inner_header, string_offset, key);
×
519
            continue;
×
520
        }
×
521

1,512,402✔
522
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
3,041,778✔
523

1,512,402✔
524
        if (stored_key != key)
3,041,778✔
525
            continue;
2,837,343✔
526

106,581✔
527
        // Literal row index (tagged)
106,581✔
528
        if (ref & 1) {
204,435✔
529
            ObjKey k(int64_t(ref >> 1));
5,640✔
530

2,820✔
531
            // The buffer is needed when for when this is an integer index.
2,820✔
532
            StringConversionBuffer buffer;
5,640✔
533
            const StringData str = column.get_value(k).get_index_data(buffer);
5,640✔
534
            const util::Optional<std::string> upper_str = case_map(str, true);
5,640✔
535
            if (upper_str == upper_value) {
5,640✔
536
                result.push_back(k);
5,592✔
537
            }
5,592✔
538
            continue;
5,640✔
539
        }
5,640✔
540

103,761✔
541
        const char* const sub_header = m_alloc.translate(ref_type(ref));
198,795✔
542
        const bool sub_isindex = get_context_flag_from_header(sub_header);
198,795✔
543

103,761✔
544
        // List of row indices with common prefix up to this point, in sorted order.
103,761✔
545
        if (!sub_isindex) {
198,795✔
546
            const IntegerColumn sub(m_alloc, ref_type(ref));
1,422✔
547
            from_list_all_ins(upper_value, result, sub, column);
1,422✔
548
            continue;
1,422✔
549
        }
1,422✔
550

103,050✔
551
        // Recurse into sub-index;
103,050✔
552
        search_list.add_all_for_level(sub_header, string_offset + 4);
197,373✔
553
    }
197,373✔
554

3,288✔
555
    // sort the result and return a std::vector
3,288✔
556
    std::sort(result.begin(), result.end());
6,576✔
557
}
6,576✔
558

559

560
void IndexArray::index_string_all(Mixed value, std::vector<ObjKey>& result, const ClusterColumn& column) const
561
{
46,758✔
562
    const char* data = m_data;
46,758✔
563
    const char* header;
46,758✔
564
    uint_least8_t width = m_width;
46,758✔
565
    bool is_inner_node = m_is_inner_bptree_node;
46,758✔
566
    size_t stringoffset = 0;
46,758✔
567

22,998✔
568
    StringConversionBuffer buffer;
46,758✔
569
    StringData index_data = value.get_index_data(buffer);
46,758✔
570
    // Create 4 byte index key
22,998✔
571
    key_type key = StringIndex::create_key(index_data, stringoffset);
46,758✔
572

22,998✔
573
    for (;;) {
76,704✔
574
        // Get subnode table
37,656✔
575
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
76,704✔
576

37,656✔
577
        // Find the position matching the key
37,656✔
578
        const char* offsets_header = m_alloc.translate(offsets_ref);
76,704✔
579
        const char* offsets_data = get_data_from_header(offsets_header);
76,704✔
580
        size_t offsets_size = get_size_from_header(offsets_header);
76,704✔
581
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
76,704✔
582

37,656✔
583
        // If key is outside range, we know there can be no match
37,656✔
584
        if (pos == offsets_size)
76,704✔
585
            return;
12✔
586

37,650✔
587
        // Get entry under key
37,650✔
588
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
76,692✔
589
        uint64_t ref = get_direct(data, width, pos_refs);
76,692✔
590

37,650✔
591
        if (is_inner_node) {
76,692✔
592
            // Set vars for next iteration
3✔
593
            header = m_alloc.translate(ref_type(ref));
6✔
594
            data = get_data_from_header(header);
6✔
595
            width = get_width_from_header(header);
6✔
596
            is_inner_node = get_is_inner_bptree_node_from_header(header);
6✔
597
            continue;
6✔
598
        }
6✔
599

37,647✔
600
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
76,686✔
601

37,647✔
602
        if (stored_key != key)
76,686✔
603
            return;
6,126✔
604

34,584✔
605
        // Literal row index (tagged)
34,584✔
606
        if (ref & 1) {
70,560✔
607
            ObjKey k(int64_t(ref >> 1));
3,099✔
608

1,674✔
609
            if (column.full_word() || column.get_value(k) == value) {
3,099✔
610
                result.push_back(k);
3,099✔
611
                return;
3,099✔
612
            }
3,099✔
613
            return;
×
614
        }
×
615

32,910✔
616
        const char* sub_header = m_alloc.translate(ref_type(ref));
67,461✔
617
        const bool sub_isindex = get_context_flag_from_header(sub_header);
67,461✔
618

32,910✔
619
        // List of row indices with common prefix up to this point, in sorted order.
32,910✔
620
        if (!sub_isindex) {
67,461✔
621
            const IntegerColumn sub(m_alloc, ref_type(ref));
37,521✔
622
            return from_list_all(value, result, sub, column);
37,521✔
623
        }
37,521✔
624

14,655✔
625
        // Recurse into sub-index;
14,655✔
626
        header = sub_header;
29,940✔
627
        data = get_data_from_header(header);
29,940✔
628
        width = get_width_from_header(header);
29,940✔
629
        is_inner_node = get_is_inner_bptree_node_from_header(header);
29,940✔
630

14,655✔
631
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
14,655✔
632
        stringoffset += 4;
29,940✔
633

14,655✔
634
        // Update 4 byte index key
14,655✔
635
        key = StringIndex::create_key(index_data, stringoffset);
29,940✔
636
    }
29,940✔
637
}
46,758✔
638

639
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
640
{
72✔
641
    const char* sub_header = alloc.translate(ref_type(ref));
72✔
642
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
643

36✔
644
    if (sub_isindex) {
72✔
645
        Array tree(alloc);
36✔
646
        tree.init_from_ref(ref);
36✔
647
        auto sz = tree.size();
36✔
648
        for (size_t n = 1; n < sz; n++) {
90✔
649
            auto rot = tree.get_as_ref_or_tagged(n);
54✔
650
            // Literal row index (tagged)
27✔
651
            if (rot.is_tagged()) {
54✔
652
                result.insert(rot.get_as_int());
30✔
653
            }
30✔
654
            else {
24✔
655
                get_all_keys_below(result, rot.get_as_ref(), alloc);
24✔
656
            }
24✔
657
        }
54✔
658
    }
36✔
659
    else {
36✔
660
        IntegerColumn tree(alloc, ref);
36✔
661
        tree.for_all([&result](int64_t i) {
84✔
662
            result.insert(i);
84✔
663
        });
84✔
664
    }
36✔
665
}
72✔
666

667
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
668
{
48✔
669
    size_t stringoffset = 0;
48✔
670

24✔
671
    for (;;) {
72✔
672
        const char* data = NodeHeader::get_data_from_header(header);
72✔
673
        uint_least8_t width = get_width_from_header(header);
72✔
674

36✔
675
        // Create 4 byte lower and upper key
36✔
676
        key_type lower = 0;
72✔
677
        size_t i = 0;
72✔
678
        size_t n = str.size() - stringoffset;
72✔
679
        bool is_at_string_end = (n <= 4);
72✔
680
        if (!is_at_string_end) {
72✔
681
            n = 4;
24✔
682
        }
24✔
683
        while (i < n) {
282✔
684
            lower <<= 8;
210✔
685
            lower += str[stringoffset + i++];
210✔
686
        }
210✔
687
        size_t shift = (4 - i) * 8;
72✔
688
        key_type upper = lower + 1;
72✔
689
        lower <<= shift;
72✔
690
        upper <<= shift;
72✔
691
        upper--;
72✔
692

36✔
693
        // Get index array
36✔
694
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
72✔
695

36✔
696
        // Find the position matching the key
36✔
697
        const char* offsets_header = m_alloc.translate(offsets_ref);
72✔
698
        const char* offsets_data = get_data_from_header(offsets_header);
72✔
699
        size_t offsets_size = get_size_from_header(offsets_header);
72✔
700
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
72✔
701
        // If key is outside range, we know there can be no match
36✔
702
        if (pos == offsets_size)
72✔
703
            return;
×
704

36✔
705
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
72✔
706

36✔
707
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
72✔
708
            bool done = false;
×
709
            while (!done) {
×
710
                // Recursively call with child node
711
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
712
                _index_string_find_all_prefix(result, str, header);
×
713

714
                // Check if current node is past end of key range or last node
715
                auto key = get_direct<32>(offsets_data, pos++);
×
716
                done = key > upper || pos == offsets_size;
×
717
            }
×
718
            return;
×
719
        }
×
720

36✔
721
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
72✔
722
        if (is_at_string_end) {
72✔
723
            // now get all entries from start to end
24✔
724
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
108✔
725
                uint64_t ref = get_direct(data, width, ndx);
60✔
726
                // Literal row index (tagged)
30✔
727
                if (ref & 1) {
60✔
728
                    result.emplace(int64_t(ref >> 1));
12✔
729
                }
12✔
730
                else {
48✔
731
                    get_all_keys_below(result, to_ref(ref), m_alloc);
48✔
732
                }
48✔
733
            }
60✔
734
            return;
48✔
735
        }
48✔
736

12✔
737
        // When we are not at end of string then we are comparing against the whole key
12✔
738
        // and we can have at most one match
12✔
739
        REALM_ASSERT(end == pos + 1);
24✔
740
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
24✔
741
        stringoffset += 4;
24✔
742
    }
24✔
743
}
48✔
744

745
ObjKey IndexArray::index_string_find_first(Mixed value, const ClusterColumn& column) const
746
{
1,654,935✔
747
    InternalFindResult unused;
1,654,935✔
748
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,654,935✔
749
}
1,654,935✔
750

751

752
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, Mixed value, const ClusterColumn& column,
753
                                       bool case_insensitive) const
754
{
53,334✔
755
    if (case_insensitive && value.is_type(type_String)) {
53,334✔
756
        index_string_all_ins(value.get_string(), result, column);
6,576✔
757
    }
6,576✔
758
    else {
46,758✔
759
        index_string_all(value, result, column);
46,758✔
760
    }
46,758✔
761
}
53,334✔
762

763
FindRes IndexArray::index_string_find_all_no_copy(Mixed value, const ClusterColumn& column,
764
                                                  InternalFindResult& result) const
765
{
654,900✔
766
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
654,900✔
767
}
654,900✔
768

769
size_t IndexArray::index_string_count(Mixed value, const ClusterColumn& column) const
770
{
12,864✔
771
    InternalFindResult unused;
12,864✔
772
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
773
}
12,864✔
774

775
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
776
{
720,930✔
777
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
720,891✔
778
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
720,930✔
779
    top->create(type);                                      // Throws
720,930✔
780

360,597✔
781
    // Mark that this is part of index
360,597✔
782
    // (as opposed to columns under leaves)
360,597✔
783
    top->set_context_flag(true);
720,930✔
784

360,597✔
785
    // Add subcolumns for leaves
360,597✔
786
    Array values(alloc);
720,930✔
787
    values.create(Array::type_Normal);       // Throws
720,930✔
788
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
720,930✔
789
    top->add(values.get_ref());              // first entry in refs points to offsets
720,930✔
790

360,597✔
791
    return top;
720,930✔
792
}
720,930✔
793

794
void StringIndex::set_target(const ClusterColumn& target_column) noexcept
795
{
×
796
    m_target_column = target_column;
×
797
}
×
798

799

800
StringIndex::key_type StringIndex::get_last_key() const
801
{
278,823✔
802
    Array offsets(m_array->get_alloc());
278,823✔
803
    get_child(*m_array, 0, offsets);
278,823✔
804
    return key_type(offsets.back());
278,823✔
805
}
278,823✔
806

807

808
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
809
{
10,041,858✔
810
    // Create 4 byte index key
5,009,352✔
811
    key_type key = create_key(index_data, offset);
10,041,858✔
812
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
10,041,858✔
813
}
10,041,858✔
814

815
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
816
                                                   const IntegerColumnIterator& lower)
817
{
2,073,168✔
818
    // At this point there exists duplicates of this value, we need to
1,035,063✔
819
    // insert value beside it's duplicates so that rows are also sorted
1,035,063✔
820
    // in ascending order.
1,035,063✔
821
    IntegerColumn::const_iterator upper = [&]() {
2,073,228✔
822
        if (m_target_column.full_word()) {
2,073,228✔
823
            return list.cend();
354✔
824
        }
354✔
825
        else {
2,072,874✔
826
            SortedListComparator slc(m_target_column);
2,072,874✔
827
            return std::upper_bound(lower, list.cend(), value, slc);
2,072,874✔
828
        }
2,072,874✔
829
    }();
2,073,228✔
830
    // find insert position (the list has to be kept in sorted order)
1,035,063✔
831
    // In most cases the refs will be added to the end. So we test for that
1,035,063✔
832
    // first to see if we can avoid the binary search for insert position
1,035,063✔
833
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,073,168✔
834
    int64_t last_key_value = *last;
2,073,168✔
835
    if (key.value >= last_key_value) {
2,073,168✔
836
        list.insert(upper.get_position(), key.value);
2,038,443✔
837
    }
2,038,443✔
838
    else {
34,725✔
839
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
34,725✔
840
        if (*inner_lower != key.value) {
34,803✔
841
            list.insert(inner_lower.get_position(), key.value);
34,746✔
842
        }
34,746✔
843
    }
34,725✔
844
}
2,073,168✔
845

846
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
847
{
1,935✔
848
    SortedListComparator slc(m_target_column);
1,935✔
849
    IntegerColumn::const_iterator it_end = list.cend();
1,935✔
850
    IntegerColumn::const_iterator lower = std::lower_bound(list.cbegin(), it_end, value, slc);
1,935✔
851

963✔
852
    if (lower == it_end) {
1,935✔
853
        // Not found and everything is less, just append it to the end.
741✔
854
        list.add(key.value);
1,500✔
855
    }
1,500✔
856
    else {
435✔
857
        ObjKey lower_key = ObjKey(*lower);
435✔
858
        Mixed lower_value = get(lower_key);
435✔
859

222✔
860
        if (lower_value != value) {
435✔
861
            list.insert(lower.get_position(), key.value);
435✔
862
        }
435✔
863
        else {
×
864
            // At this point there exists duplicates of this value, we need to
865
            // insert value beside it's duplicates so that rows are also sorted
866
            // in ascending order.
867
            insert_to_existing_list_at_lower(key, value, list, lower);
×
868
        }
×
869
    }
435✔
870
}
1,935✔
871

872

873
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
874
{
2,565✔
875
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
2,565✔
876

1,329✔
877
    // Create 4 byte index key
1,329✔
878
    key_type key = create_key(index_data, offset);
2,565✔
879

1,329✔
880
    // Get subnode table
1,329✔
881
    Allocator& alloc = m_array->get_alloc();
2,565✔
882
    Array values(alloc);
2,565✔
883
    get_child(*m_array, 0, values);
2,565✔
884
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,565✔
885

1,329✔
886
    size_t ins_pos = values.lower_bound_int(key);
2,565✔
887
    if (ins_pos == values.size()) {
2,565✔
888
        // When key is outside current range, we can just add it
1,329✔
889
        values.add(key);
2,565✔
890
        m_array->add(ref);
2,565✔
891
        return;
2,565✔
892
    }
2,565✔
893

894
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
895
    // Since we only use this for moving existing values to new
896
    // subindexes, there should never be an existing match.
897
    key_type k = key_type(values.get(ins_pos));
×
898
    REALM_ASSERT(k != key);
×
899
#endif // LCOV_EXCL_STOP ignore debug code
×
900

901
    // If key is not present we add it at the correct location
902
    values.insert(ins_pos, key);
×
903
    m_array->insert(ins_pos + 1, ref);
×
904
}
×
905

906

907
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
908
{
10,046,961✔
909
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
10,046,961✔
910
    switch (nc.type) {
10,046,961✔
911
        case NodeChange::change_None:
10,041,999✔
912
            return;
10,041,999✔
913
        case NodeChange::change_InsertBefore: {
3✔
914
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
3✔
915
            new_node.node_add_key(nc.ref1);
3✔
916
            new_node.node_add_key(get_ref());
3✔
917
            m_array->init_from_ref(new_node.get_ref());
3✔
918
            m_array->update_parent();
3✔
919
            return;
3✔
920
        }
×
921
        case NodeChange::change_InsertAfter: {
9✔
922
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
9✔
923
            new_node.node_add_key(get_ref());
9✔
924
            new_node.node_add_key(nc.ref1);
9✔
925
            m_array->init_from_ref(new_node.get_ref());
9✔
926
            m_array->update_parent();
9✔
927
            return;
9✔
928
        }
×
929
        case NodeChange::change_Split: {
72✔
930
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
72✔
931
            new_node.node_add_key(nc.ref1);
72✔
932
            new_node.node_add_key(nc.ref2);
72✔
933
            m_array->init_from_ref(new_node.get_ref());
72✔
934
            m_array->update_parent();
72✔
935
            return;
72✔
936
        }
×
937
    }
×
938
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
939
}
×
940

941

942
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
943
                                               const Mixed& value)
944
{
10,325,298✔
945
    Allocator& alloc = m_array->get_alloc();
10,325,298✔
946
    if (m_array->is_inner_bptree_node()) {
10,325,298✔
947
        // Get subnode table
127,935✔
948
        Array keys(alloc);
273,618✔
949
        get_child(*m_array, 0, keys);
273,618✔
950
        REALM_ASSERT(m_array->size() == keys.size() + 1);
273,618✔
951

127,935✔
952
        // Find the subnode containing the item
127,935✔
953
        size_t node_ndx = keys.lower_bound_int(key);
273,618✔
954
        if (node_ndx == keys.size()) {
273,618✔
955
            // node can never be empty, so try to fit in last item
654✔
956
            node_ndx = keys.size() - 1;
10,116✔
957
        }
10,116✔
958

127,935✔
959
        // Get sublist
127,935✔
960
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
273,618✔
961
        ref_type ref = m_array->get_as_ref(refs_ndx);
273,618✔
962
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
273,618✔
963

127,935✔
964
        // Insert item
127,935✔
965
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
273,618✔
966
        if (nc.type == NodeChange::change_None) {
273,618✔
967
            // update keys
127,701✔
968
            key_type last_key = target.get_last_key();
273,132✔
969
            keys.set(node_ndx, last_key);
273,132✔
970
            return NodeChange::change_None; // no new nodes
273,132✔
971
        }
273,132✔
972

234✔
973
        if (nc.type == NodeChange::change_InsertAfter) {
486✔
974
            ++node_ndx;
9✔
975
            ++refs_ndx;
9✔
976
        }
9✔
977

234✔
978
        // If there is room, just update node directly
234✔
979
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
486✔
980
            if (nc.type == NodeChange::change_Split) {
486✔
981
                node_insert_split(node_ndx, nc.ref2);
471✔
982
            }
471✔
983
            else {
15✔
984
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
15✔
985
            }
15✔
986
            return NodeChange::change_None;
486✔
987
        }
486✔
988

989
        // Else create new node
990
        StringIndex new_node(inner_node_tag(), alloc);
×
991
        if (nc.type == NodeChange::change_Split) {
×
992
            // update offset for left node
993
            key_type last_key = target.get_last_key();
×
994
            keys.set(node_ndx, last_key);
×
995

996
            new_node.node_add_key(nc.ref2);
×
997
            ++node_ndx;
×
998
            ++refs_ndx;
×
999
        }
×
1000
        else {
×
1001
            new_node.node_add_key(nc.ref1);
×
1002
        }
×
1003

1004
        switch (node_ndx) {
×
1005
            case 0: // insert before
×
1006
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
1007
            case REALM_MAX_BPNODE_SIZE: // insert after
×
1008
                if (nc.type == NodeChange::change_Split)
×
1009
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
1010
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
1011
            default: // split
×
1012
                // Move items after split to new node
1013
                size_t len = m_array->size();
×
1014
                for (size_t i = refs_ndx; i < len; ++i) {
×
1015
                    ref_type ref_i = m_array->get_as_ref(i);
×
1016
                    new_node.node_add_key(ref_i);
×
1017
                }
×
1018
                keys.truncate(node_ndx);
×
1019
                m_array->truncate(refs_ndx);
×
1020
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
1021
        }
10,051,680✔
1022
    }
10,051,680✔
1023
    else {
10,051,680✔
1024
        // Is there room in the list?
5,013,744✔
1025
        Array old_keys(alloc);
10,051,680✔
1026
        get_child(*m_array, 0, old_keys);
10,051,680✔
1027
        const size_t old_offsets_size = old_keys.size();
10,051,680✔
1028
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
10,051,680✔
1029

5,013,744✔
1030
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
10,051,680✔
1031

5,013,744✔
1032
        // See if we can fit entry into current leaf
5,013,744✔
1033
        // Works if there is room or it can join existing entries
5,013,744✔
1034
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
10,051,680✔
1035
            return NodeChange::change_None;
10,041,366✔
1036

3,186✔
1037
        // Create new list for item (a leaf)
3,186✔
1038
        StringIndex new_list(m_target_column, alloc);
10,314✔
1039

3,186✔
1040
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
10,314✔
1041

3,186✔
1042
        size_t ndx = old_keys.lower_bound_int(key);
10,314✔
1043

3,186✔
1044
        // insert before
3,186✔
1045
        if (ndx == 0)
10,314✔
1046
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
9✔
1047

3,183✔
1048
        // insert after
3,183✔
1049
        if (ndx == old_offsets_size)
10,305✔
1050
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
18✔
1051

3,177✔
1052
        // split
3,177✔
1053
        Array new_keys(alloc);
10,287✔
1054
        get_child(*new_list.m_array, 0, new_keys);
10,287✔
1055
        // Move items after split to new list
3,177✔
1056
        for (size_t i = ndx; i < old_offsets_size; ++i) {
288,078✔
1057
            int64_t v2 = old_keys.get(i);
277,791✔
1058
            int64_t v3 = m_array->get(i + 1);
277,791✔
1059

140,169✔
1060
            new_keys.add(v2);
277,791✔
1061
            new_list.m_array->add(v3);
277,791✔
1062
        }
277,791✔
1063
        old_keys.truncate(ndx);
10,287✔
1064
        m_array->truncate(ndx + 1);
10,287✔
1065

3,177✔
1066
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
10,287✔
1067
    }
10,287✔
1068
}
10,325,298✔
1069

1070

1071
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1072
{
471✔
1073
    REALM_ASSERT(m_array->is_inner_bptree_node());
471✔
1074
    REALM_ASSERT(new_ref);
471✔
1075

234✔
1076
    Allocator& alloc = m_array->get_alloc();
471✔
1077
    Array offsets(alloc);
471✔
1078
    get_child(*m_array, 0, offsets);
471✔
1079

234✔
1080
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
471✔
1081
    REALM_ASSERT(ndx < offsets.size());
471✔
1082
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
471✔
1083

234✔
1084
    // Get sublists
234✔
1085
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
471✔
1086
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
471✔
1087
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
471✔
1088
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
471✔
1089

234✔
1090
    // Update original key
234✔
1091
    key_type last_key = orig_col.get_last_key();
471✔
1092
    offsets.set(ndx, last_key);
471✔
1093

234✔
1094
    // Insert new ref
234✔
1095
    key_type new_key = new_col.get_last_key();
471✔
1096
    offsets.insert(ndx + 1, new_key);
471✔
1097
    m_array->insert(ndx + 2, new_ref);
471✔
1098
}
471✔
1099

1100

1101
void StringIndex::node_insert(size_t ndx, size_t ref)
1102
{
15✔
1103
    REALM_ASSERT(ref);
15✔
1104
    REALM_ASSERT(m_array->is_inner_bptree_node());
15✔
1105

1106
    Allocator& alloc = m_array->get_alloc();
15✔
1107
    Array offsets(alloc);
15✔
1108
    get_child(*m_array, 0, offsets);
15✔
1109
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
15✔
1110

1111
    REALM_ASSERT(ndx <= offsets.size());
15✔
1112
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
15✔
1113

1114
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
15✔
1115
    key_type last_key = col.get_last_key();
15✔
1116

1117
    offsets.insert(ndx, last_key);
15✔
1118
    m_array->insert(ndx + 1, ref);
15✔
1119
}
15✔
1120

1121
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1122
                              bool noextend)
1123
{
10,033,848✔
1124
    REALM_ASSERT(!m_array->is_inner_bptree_node());
10,033,848✔
1125

5,006,175✔
1126
    // Get subnode table
5,006,175✔
1127
    Allocator& alloc = m_array->get_alloc();
10,033,848✔
1128
    Array keys(alloc);
10,033,848✔
1129
    get_child(*m_array, 0, keys);
10,033,848✔
1130
    REALM_ASSERT(m_array->size() == keys.size() + 1);
10,033,848✔
1131

5,006,175✔
1132
    // If we are keeping the complete string in the index
5,006,175✔
1133
    // we want to know if this is the last part
5,006,175✔
1134
    bool is_at_string_end = offset + 4 >= index_data.size();
10,033,848✔
1135

5,006,175✔
1136
    size_t ins_pos = keys.lower_bound_int(key);
10,033,848✔
1137
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
10,033,848✔
1138

5,006,175✔
1139
    if (ins_pos == keys.size()) {
10,033,848✔
1140
        if (noextend)
879,951✔
1141
            return false;
18✔
1142

438,855✔
1143
        // When key is outside current range, we can just add it
438,855✔
1144
        keys.add(key);
879,933✔
1145
        if (!m_target_column.full_word() || is_at_string_end) {
879,933✔
1146
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
518,874✔
1147
            m_array->add(shifted);
518,874✔
1148
        }
518,874✔
1149
        else {
361,059✔
1150
            // create subindex for rest of string
180,561✔
1151
            StringIndex subindex(m_target_column, m_array->get_alloc());
361,059✔
1152
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
361,059✔
1153
            m_array->add(subindex.get_ref());
361,059✔
1154
        }
361,059✔
1155
        return true;
879,933✔
1156
    }
879,933✔
1157

4,567,314✔
1158
    key_type k = key_type(keys.get(ins_pos));
9,153,897✔
1159

4,567,314✔
1160
    // If key is not present we add it at the correct location
4,567,314✔
1161
    if (k != key) {
9,153,897✔
1162
        if (noextend)
953,541✔
1163
            return false;
552✔
1164

468,927✔
1165
        keys.insert(ins_pos, key);
952,989✔
1166
        if (!m_target_column.full_word() || is_at_string_end) {
952,989✔
1167
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
771,714✔
1168
            m_array->insert(ins_pos_refs, shifted);
771,714✔
1169
        }
771,714✔
1170
        else {
181,275✔
1171
            // create subindex for rest of string
90,705✔
1172
            StringIndex subindex(m_target_column, m_array->get_alloc());
181,275✔
1173
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
181,275✔
1174
            m_array->insert(ins_pos_refs, subindex.get_ref());
181,275✔
1175
        }
181,275✔
1176
        return true;
952,989✔
1177
    }
952,989✔
1178

4,098,120✔
1179
    // This leaf already has a slot for for the key
4,098,120✔
1180

4,098,120✔
1181
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,200,356✔
1182
    size_t suboffset = offset + s_index_key_length;
8,200,356✔
1183

4,098,120✔
1184
    // Single match (lowest bit set indicates literal row_ndx)
4,098,120✔
1185
    if ((slot_value & 1) != 0) {
8,200,356✔
1186
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
52,005✔
1187
        Mixed v2 = m_target_column.full_word() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
51,702✔
1188
        if (v2 == value) {
52,005✔
1189
            if (obj_key.value != obj_key2.value) {
9,645✔
1190
                // Strings are equal but this is not a list.
4,866✔
1191
                // Create a list and add both rows.
4,866✔
1192

4,866✔
1193
                // convert to list (in sorted order)
4,866✔
1194
                Array row_list(alloc);
9,615✔
1195
                row_list.create(Array::type_Normal); // Throws
9,615✔
1196
                row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
8,991✔
1197
                row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
8,991✔
1198
                m_array->set(ins_pos_refs, row_list.get_ref());
9,615✔
1199
            }
9,615✔
1200
        }
9,645✔
1201
        else {
42,360✔
1202
            StringConversionBuffer buffer;
42,360✔
1203
            auto index_data_2 = v2.get_index_data(buffer);
42,360✔
1204
            if (index_data == index_data_2 || suboffset > s_max_offset) {
42,360✔
1205
                // These strings have the same prefix up to this point but we
297✔
1206
                // don't want to recurse further, create a list in sorted order.
297✔
1207
                bool row_ndx_first = value.compare_signed(v2) < 0;
573✔
1208
                Array row_list(alloc);
573✔
1209
                row_list.create(Array::type_Normal); // Throws
573✔
1210
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
441✔
1211
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
441✔
1212
                m_array->set(ins_pos_refs, row_list.get_ref());
573✔
1213
            }
573✔
1214
            else {
41,787✔
1215
                // These strings have the same prefix up to this point but they
21,495✔
1216
                // are actually not equal. Extend the tree recursivly until the
21,495✔
1217
                // prefix of these strings is different.
21,495✔
1218
                StringIndex subindex(m_target_column, m_array->get_alloc());
41,787✔
1219
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
41,787✔
1220
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
41,787✔
1221
                // Join the string of SubIndices to the current position of m_array
21,495✔
1222
                m_array->set(ins_pos_refs, subindex.get_ref());
41,787✔
1223
            }
41,787✔
1224
        }
42,360✔
1225
        return true;
52,005✔
1226
    }
52,005✔
1227

4,071,447✔
1228
    // If there already is a list of matches, we see if we fit there
4,071,447✔
1229
    // or it has to be split into a subindex
4,071,447✔
1230
    ref_type ref = ref_type(slot_value);
8,148,351✔
1231
    char* header = alloc.translate(ref);
8,148,351✔
1232
    if (!Array::get_context_flag_from_header(header)) {
8,148,351✔
1233
        IntegerColumn sub(alloc, ref); // Throws
2,077,311✔
1234
        sub.set_parent(m_array.get(), ins_pos_refs);
2,077,311✔
1235

1,037,142✔
1236
        IntegerColumn::const_iterator it_end = sub.cend();
2,077,311✔
1237
        IntegerColumn::const_iterator lower = it_end;
2,077,311✔
1238

1,037,142✔
1239
        auto value_exists_in_list = [&]() {
2,077,833✔
1240
            if (m_target_column.full_word()) {
2,077,833✔
1241
                lower = sub.cbegin();
360✔
1242
                return reconstruct_string(offset, key, index_data) == value.get_string();
360✔
1243
            }
360✔
1244
            SortedListComparator slc(m_target_column);
2,077,473✔
1245
            lower = std::lower_bound(sub.cbegin(), it_end, value, slc);
2,077,473✔
1246

1,037,319✔
1247
            if (lower != it_end) {
2,077,473✔
1248
                Mixed lower_value = get(ObjKey(*lower));
2,074,356✔
1249
                if (lower_value == value) {
2,074,356✔
1250
                    return true;
2,072,859✔
1251
                }
2,072,859✔
1252
            }
4,614✔
1253
            return false;
4,614✔
1254
        };
4,614✔
1255

1,037,142✔
1256
        // If we found the value in this list, add the duplicate to the list.
1,037,142✔
1257
        if (value_exists_in_list()) {
2,077,311✔
1258
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
2,073,192✔
1259
        }
2,073,192✔
1260
        else {
4,119✔
1261
            // If the list only stores duplicates we are free to branch and
2,067✔
1262
            // and create a sub index with this existing list as one of the
2,067✔
1263
            // leafs, but if the list doesn't only contain duplicates we
2,067✔
1264
            // must respect that we store a common key prefix up to this
2,067✔
1265
            // point and insert into the existing list.
2,067✔
1266
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
4,119✔
1267
            StringConversionBuffer buffer;
4,119✔
1268
            StringData index_data_2 = m_target_column.full_word() ? reconstruct_string(offset, key, index_data)
2,070✔
1269
                                                                  : get(key_of_any_dup).get_index_data(buffer);
4,116✔
1270
            if (index_data == index_data_2 || suboffset > s_max_offset) {
4,494✔
1271
                insert_to_existing_list(obj_key, value, sub);
1,935✔
1272
            }
1,935✔
1273
            else {
2,184✔
1274
#ifdef REALM_DEBUG
2,184✔
1275
                bool contains_only_duplicates = true;
2,184✔
1276
                if (!m_target_column.full_word() && sub.size() > 1) {
2,559✔
1277
                    ObjKey first_key = ObjKey(sub.get(0));
1,824✔
1278
                    ObjKey last_key = ObjKey(sub.back());
1,824✔
1279
                    auto first = get(first_key);
1,824✔
1280
                    auto last = get(last_key);
1,824✔
1281
                    // Since the list is kept in sorted order, the first and
972✔
1282
                    // last values will be the same only if the whole list is
972✔
1283
                    // storing duplicate values.
972✔
1284
                    if (first != last) {
1,824✔
1285
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
1286
                    }
×
1287
                }
1,824✔
1288
                REALM_ASSERT_DEBUG(contains_only_duplicates);
2,184✔
1289
#endif
2,184✔
1290
                // The buffer is needed for when this is an integer index.
1,104✔
1291
                StringIndex subindex(m_target_column, m_array->get_alloc());
2,184✔
1292
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
2,184✔
1293
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
2,184✔
1294
                m_array->set(ins_pos_refs, subindex.get_ref());
2,184✔
1295
            }
2,184✔
1296
        }
4,119✔
1297
        return true;
2,077,311✔
1298
    }
2,077,311✔
1299

3,034,305✔
1300
    // The key matches, but there is a subindex here so go down a level in the tree.
3,034,305✔
1301
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,071,040✔
1302
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
6,071,040✔
1303

3,034,305✔
1304
    return true;
6,071,040✔
1305
}
6,071,040✔
1306

1307
Mixed StringIndex::get(ObjKey key) const
1308
{
3,708,561✔
1309
    return m_target_column.get_value(key);
3,708,561✔
1310
}
3,708,561✔
1311

1312
void StringIndex::erase(ObjKey key)
1313
{
913,989✔
1314
    StringConversionBuffer buffer;
913,989✔
1315
    if (m_target_column.full_word()) {
913,989✔
1316
        if (m_target_column.tokenize()) {
78✔
1317
            // This is a full text index
36✔
1318
            auto index_data(get(key).get_index_data(buffer));
72✔
1319
            auto words = Tokenizer::get_instance()->reset(std::string_view(index_data)).get_all_tokens();
72✔
1320
            for (auto& w : words) {
2,460✔
1321
                erase_string(key, w);
2,460✔
1322
            }
2,460✔
1323
        }
72✔
1324
        else {
6✔
1325
            // This is a list (of strings)
3✔
1326
            erase_list(key, m_target_column.get_list(key));
6✔
1327
        }
6✔
1328
    }
78✔
1329
    else {
913,911✔
1330
        erase_string(key, get(key).get_index_data(buffer));
913,911✔
1331
    }
913,911✔
1332
}
913,989✔
1333

1334
void StringIndex::erase_list(ObjKey key, const Lst<String>& list)
1335
{
12✔
1336
    std::vector<StringData> strings;
12✔
1337
    strings.reserve(list.size());
12✔
1338
    for (auto& val : list) {
48✔
1339
        strings.push_back(val);
48✔
1340
    }
48✔
1341

6✔
1342
    std::sort(strings.begin(), strings.end());
12✔
1343
    auto last = std::unique(strings.begin(), strings.end());
12✔
1344
    for (auto it = strings.begin(); it != last; ++it) {
48✔
1345
        erase_string(key, *it);
36✔
1346
    }
36✔
1347
}
12✔
1348

1349
namespace {
1350
template <typename T>
1351
void intersect(std::vector<ObjKey>& result, T& keys)
1352
{
276✔
1353
    if (result.empty()) {
276✔
1354
        result.reserve(keys.size());
180✔
1355
        for (auto k : keys) {
498✔
1356
            result.emplace_back(k);
498✔
1357
        }
498✔
1358
    }
180✔
1359
    else {
96✔
1360
        auto it = result.begin();
96✔
1361
        auto keep = it;
96✔
1362
        auto m = keys.begin();
96✔
1363

48✔
1364
        // only keep intersection
48✔
1365
        while (it != result.end() && m != keys.end()) {
354✔
1366
            int64_t int_val = *m;
258✔
1367
            if (it->value < int_val) {
258✔
1368
                it++; // don't keep if match is not in new set
18✔
1369
            }
18✔
1370
            else if (it->value > int_val) {
240✔
1371
                ++m; // ignore new matches
102✔
1372
            }
102✔
1373
            else {
138✔
1374
                // Found both places - make sure it is kept
69✔
1375
                if (keep < it)
138✔
1376
                    *keep = *it;
6✔
1377
                ++keep;
138✔
1378
                ++it;
138✔
1379
                ++m;
138✔
1380
            }
138✔
1381
        }
258✔
1382
        if (keep != result.end()) {
96✔
1383
            result.erase(keep, result.end());
24✔
1384
        }
24✔
1385
    }
96✔
1386
}
276✔
1387

1388
struct FindResWrapper {
1389
    InternalFindResult& res;
1390
    IntegerColumn& indexes;
1391
    size_t n = 0;
1392
    size_t size()
1393
    {
144✔
1394
        return res.end_ndx - res.start_ndx;
144✔
1395
    }
144✔
1396
    auto begin()
1397
    {
228✔
1398
        return indexes.cbegin();
228✔
1399
    }
228✔
1400
    auto end()
1401
    {
384✔
1402
        return indexes.cend();
384✔
1403
    }
384✔
1404
};
1405
} // namespace
1406

1407
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1408
{
360✔
1409
    InternalFindResult res;
360✔
1410
    REALM_ASSERT(result.empty());
360✔
1411

180✔
1412
    auto tokenizer = Tokenizer::get_instance();
360✔
1413
    tokenizer->reset({value.data(), value.size()});
360✔
1414
    auto [includes, excludes] = tokenizer->get_search_tokens();
360✔
1415
    if (includes.empty()) {
360✔
1416
        if (excludes.empty()) {
24✔
1417
            throw InvalidArgument("Missing search token");
6✔
1418
        }
6✔
1419
        result = m_target_column.get_all_keys();
18✔
1420
    }
18✔
1421
    else {
336✔
1422
        for (auto& token : includes) {
414✔
1423
            if (token.back() == '*') {
414✔
1424
                std::set<int64_t> keys;
48✔
1425
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
48✔
1426
                intersect(result, keys);
48✔
1427
            }
48✔
1428
            else {
366✔
1429
                switch (find_all_no_copy(StringData{token}, res)) {
366✔
1430
                    case FindRes_not_found:
12✔
1431
                        result.clear();
12✔
1432
                        break;
12✔
1433
                    case FindRes_column: {
228✔
1434
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
228✔
1435
                        FindResWrapper wrapper{res, indexes};
228✔
1436
                        intersect(result, wrapper);
228✔
1437
                        break;
228✔
1438
                    }
×
1439
                    case FindRes_single:
126✔
1440
                        // merge in single res
63✔
1441
                        if (result.empty()) {
126✔
1442
                            result.emplace_back(res.payload);
96✔
1443
                        }
96✔
1444
                        else {
30✔
1445
                            ObjKey key(res.payload);
30✔
1446
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1447
                            if (pos != result.end() && key == *pos) {
30✔
1448
                                result.clear();
30✔
1449
                                result.push_back(key);
30✔
1450
                            }
30✔
1451
                            else {
×
1452
                                result.clear();
×
1453
                            }
×
1454
                        }
30✔
1455
                        break;
126✔
1456
                }
414✔
1457
            }
414✔
1458
            if (result.empty())
414✔
1459
                return;
18✔
1460
        }
414✔
1461
    }
336✔
1462

180✔
1463
    for (auto& token : excludes) {
348✔
1464
        if (token.back() == '*') {
96✔
1465
            throw IllegalOperation("Exclude by prefix is not implemented");
×
1466
        }
×
1467
        if (result.empty())
96✔
1468
            return;
×
1469

48✔
1470
        switch (find_all_no_copy(StringData{token}, res)) {
96✔
1471
            case FindRes_not_found:
✔
1472
                // Nothing to exclude
1473
                break;
×
1474
            case FindRes_column: {
60✔
1475
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1476

30✔
1477
                auto it = result.begin();
60✔
1478
                auto keep = it;
60✔
1479
                size_t m = res.start_ndx;
60✔
1480
                auto idx_val = indexes.get(m);
60✔
1481

30✔
1482
                while (it != result.end()) {
390✔
1483
                    if (it->value < idx_val) {
330✔
1484
                        // Not found in excludes
78✔
1485
                        if (keep < it)
156✔
1486
                            *keep = *it;
156✔
1487
                        ++keep;
156✔
1488
                        ++it;
156✔
1489
                    }
156✔
1490
                    else {
174✔
1491
                        if (it->value == idx_val) {
174✔
1492
                            // found in excludes - don't keep
75✔
1493
                            ++it;
150✔
1494
                        }
150✔
1495
                        ++m;
174✔
1496
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
153✔
1497
                    }
174✔
1498
                }
330✔
1499
                if (keep != result.end()) {
60✔
1500
                    result.erase(keep, result.end());
60✔
1501
                }
60✔
1502
                break;
60✔
1503
            }
×
1504
            case FindRes_single: {
36✔
1505
                // exclude single res
18✔
1506
                ObjKey key(res.payload);
36✔
1507
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1508
                if (pos != result.end() && key == *pos) {
36✔
1509
                    result.erase(pos);
36✔
1510
                }
36✔
1511
                break;
36✔
1512
            }
×
1513
        }
96✔
1514
    }
96✔
1515
}
336✔
1516

1517

1518
void StringIndex::clear()
1519
{
3,024✔
1520
    Array values(m_array->get_alloc());
3,024✔
1521
    get_child(*m_array, 0, values);
3,024✔
1522
    REALM_ASSERT(m_array->size() == values.size() + 1);
3,024✔
1523

855✔
1524
    values.clear();
3,024✔
1525
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
3,024✔
1526

855✔
1527
    size_t size = 1;
3,024✔
1528
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
3,024✔
1529

855✔
1530
    m_array->set_type(Array::type_HasRefs);
3,024✔
1531
}
3,024✔
1532

1533

1534
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
1535
{
1,160,451✔
1536
    Allocator& alloc = m_array->get_alloc();
1,160,451✔
1537
    Array values(alloc);
1,160,451✔
1538
    get_child(*m_array, 0, values);
1,160,451✔
1539
    REALM_ASSERT(m_array->size() == values.size() + 1);
1,160,451✔
1540

579,942✔
1541
    // Create 4 byte index key
579,942✔
1542
    key_type key = create_key(index_data, offset);
1,160,451✔
1543

579,942✔
1544
    const size_t pos = values.lower_bound_int(key);
1,160,451✔
1545
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,160,451✔
1546
    REALM_ASSERT(pos != values.size());
1,160,451✔
1547

579,942✔
1548
    if (m_array->is_inner_bptree_node()) {
1,160,451✔
1549
        ref_type ref = m_array->get_as_ref(pos_refs);
4,734✔
1550
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
4,734✔
1551
        node.do_delete(obj_key, index_data, offset);
4,734✔
1552

2,367✔
1553
        // Update the ref
2,367✔
1554
        if (node.is_empty()) {
4,734✔
1555
            values.erase(pos);
×
1556
            m_array->erase(pos_refs);
×
1557
            node.destroy();
×
1558
        }
×
1559
        else {
4,734✔
1560
            key_type max_val = node.get_last_key();
4,734✔
1561
            if (max_val != key_type(values.get(pos)))
4,734✔
1562
                values.set(pos, max_val);
×
1563
        }
4,734✔
1564
    }
4,734✔
1565
    else {
1,155,717✔
1566
        uint64_t ref = m_array->get(pos_refs);
1,155,717✔
1567
        if (ref & 1) {
1,155,717✔
1568
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
562,995✔
1569
            values.erase(pos);
562,995✔
1570
            m_array->erase(pos_refs);
562,995✔
1571
        }
562,995✔
1572
        else {
592,722✔
1573
            // A real ref either points to a list or a subindex
294,591✔
1574
            char* header = alloc.translate(ref_type(ref));
592,722✔
1575
            if (Array::get_context_flag_from_header(header)) {
592,722✔
1576
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
239,196✔
1577
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
239,196✔
1578

119,052✔
1579
                if (subindex.is_empty()) {
239,196✔
1580
                    values.erase(pos);
12,687✔
1581
                    m_array->erase(pos_refs);
12,687✔
1582
                    subindex.destroy();
12,687✔
1583
                }
12,687✔
1584
            }
239,196✔
1585
            else {
353,526✔
1586
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
353,526✔
1587
                sub.set_parent(m_array.get(), pos_refs);
353,526✔
1588
                size_t r = sub.find_first(obj_key.value);
353,526✔
1589
                size_t sub_size = sub.size(); // Slow
353,526✔
1590
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
353,526✔
1591
                sub.erase(r);
353,526✔
1592

175,539✔
1593
                if (sub_size == 1) {
353,526✔
1594
                    values.erase(pos);
3,210✔
1595
                    m_array->erase(pos_refs);
3,210✔
1596
                    sub.destroy();
3,210✔
1597
                }
3,210✔
1598
            }
353,526✔
1599
        }
592,722✔
1600
    }
1,155,717✔
1601
}
1,160,451✔
1602

1603
void StringIndex::erase_string(ObjKey key, StringData value)
1604
{
916,566✔
1605
    do_delete(key, value, 0);
916,566✔
1606

458,550✔
1607
    // Collapse top nodes with single item
458,550✔
1608
    while (m_array->is_inner_bptree_node()) {
916,566✔
1609
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
4,734✔
1610
        if (m_array->size() > 2)
4,734✔
1611
            break;
4,734✔
1612

1613
        ref_type ref = m_array->get_as_ref(1);
×
1614
        m_array->set(1, 1); // avoid destruction of the extracted ref
×
1615
        m_array->destroy_deep();
×
1616
        m_array->init_from_ref(ref);
×
1617
        m_array->update_parent();
×
1618
    }
×
1619
}
916,566✔
1620

1621
namespace {
1622

1623
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
1624
{
6,366✔
1625
    Allocator& alloc = node.get_alloc();
6,366✔
1626
    Array child(alloc);
6,366✔
1627
    size_t n = node.size();
6,366✔
1628
    REALM_ASSERT(n >= 1);
6,366✔
1629
    if (node.is_inner_bptree_node()) {
6,366✔
1630
        // Inner node
1631
        for (size_t i = 1; i < n; ++i) {
×
1632
            ref_type ref = node.get_as_ref(i);
×
1633
            child.init_from_ref(ref);
×
1634
            if (has_duplicate_values(child, target_col))
×
1635
                return true;
×
1636
        }
×
1637
        return false;
×
1638
    }
6,366✔
1639

3,183✔
1640
    // Leaf node
3,183✔
1641
    for (size_t i = 1; i < n; ++i) {
32,850✔
1642
        int_fast64_t value = node.get(i);
28,740✔
1643
        bool is_single_row_index = (value & 1) != 0;
28,740✔
1644
        if (is_single_row_index)
28,740✔
1645
            continue;
23,280✔
1646

2,730✔
1647
        ref_type ref = to_ref(value);
5,460✔
1648
        child.init_from_ref(ref);
5,460✔
1649

2,730✔
1650
        bool is_subindex = child.get_context_flag();
5,460✔
1651
        if (is_subindex) {
5,460✔
1652
            if (has_duplicate_values(child, target_col))
4,872✔
1653
                return true;
1,800✔
1654
            continue;
3,072✔
1655
        }
3,072✔
1656

294✔
1657
        // Child is root of B+-tree of row indexes
294✔
1658
        IntegerColumn sub(alloc, ref);
588✔
1659
        if (sub.size() > 1) {
588✔
1660
            ObjKey first_key = ObjKey(sub.get(0));
480✔
1661
            ObjKey last_key = ObjKey(sub.back());
480✔
1662
            Mixed first = target_col.get_value(first_key);
480✔
1663
            Mixed last = target_col.get_value(last_key);
480✔
1664
            // Since the list is kept in sorted order, the first and
240✔
1665
            // last values will be the same only if the whole list is
240✔
1666
            // storing duplicate values.
240✔
1667
            if (first == last) {
480✔
1668
                return true;
432✔
1669
            }
432✔
1670
            // There may also be several short lists combined, so we need to
24✔
1671
            // check each of these individually for duplicates.
24✔
1672
            IntegerColumn::const_iterator it = sub.cbegin();
48✔
1673
            IntegerColumn::const_iterator it_end = sub.cend();
48✔
1674
            SortedListComparator slc(target_col);
48✔
1675
            while (it != it_end) {
144✔
1676
                Mixed it_data = target_col.get_value(ObjKey(*it));
120✔
1677
                IntegerColumn::const_iterator next = std::upper_bound(it, it_end, it_data, slc);
120✔
1678
                size_t count_of_value = next - it; // row index subtraction in `sub`
120✔
1679
                if (count_of_value > 1) {
120✔
1680
                    return true;
24✔
1681
                }
24✔
1682
                it = next;
96✔
1683
            }
96✔
1684
        }
48✔
1685
    }
588✔
1686

3,183✔
1687
    return false;
5,238✔
1688
}
6,366✔
1689

1690
} // anonymous namespace
1691

1692

1693
bool StringIndex::has_duplicate_values() const noexcept
1694
{
1,494✔
1695
    return ::has_duplicate_values(*m_array, m_target_column);
1,494✔
1696
}
1,494✔
1697

1698

1699
bool StringIndex::is_empty() const
1700
{
244,086✔
1701
    return m_array->size() == 1; // first entry in refs points to offsets
244,086✔
1702
}
244,086✔
1703

1704
template <>
1705
void StringIndex::insert<StringData>(ObjKey key, StringData value)
1706
{
773,319✔
1707
    StringConversionBuffer buffer;
773,319✔
1708

387,003✔
1709
    if (this->m_target_column.tokenize()) {
773,319✔
1710
        auto words = Tokenizer::get_instance()->reset(std::string_view(value)).get_all_tokens();
198✔
1711

99✔
1712
        for (auto& word : words) {
936✔
1713
            Mixed m(word);
936✔
1714
            insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
936✔
1715
        }
936✔
1716
    }
198✔
1717
    else {
773,121✔
1718
        Mixed m(value);
773,121✔
1719
        insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
773,121✔
1720
    }
773,121✔
1721
}
773,319✔
1722

1723
template <>
1724
void StringIndex::set<StringData>(ObjKey key, StringData new_value)
1725
{
439,389✔
1726
    StringConversionBuffer buffer;
439,389✔
1727
    Mixed old_value = get(key);
439,389✔
1728
    Mixed new_value2 = Mixed(new_value);
439,389✔
1729

219,363✔
1730
    if (this->m_target_column.tokenize()) {
439,389✔
1731
        auto tokenizer = Tokenizer::get_instance();
168✔
1732
        StringData old_string = old_value.get_index_data(buffer);
168✔
1733
        std::set<std::string> old_words;
168✔
1734

84✔
1735
        if (old_string.size() > 0) {
168✔
1736
            tokenizer->reset({old_string.data(), old_string.size()});
6✔
1737
            old_words = tokenizer->get_all_tokens();
6✔
1738
        }
6✔
1739

84✔
1740
        tokenizer->reset({new_value.data(), new_value.size()});
168✔
1741
        auto new_words = tokenizer->get_all_tokens();
168✔
1742

84✔
1743
        auto w1 = old_words.begin();
168✔
1744
        auto w2 = new_words.begin();
168✔
1745

84✔
1746
        // Do a diff, deleting words no longer present and
84✔
1747
        // inserting new words
84✔
1748
        while (w1 != old_words.end() && w2 != new_words.end()) {
540✔
1749
            if (*w1 < *w2) {
372✔
1750
                erase_string(key, *w1);
126✔
1751
                ++w1;
126✔
1752
            }
126✔
1753
            else if (*w2 < *w1) {
246✔
1754
                Mixed m(*w2);
198✔
1755
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
198✔
1756
                ++w2;
198✔
1757
            }
198✔
1758
            else {
48✔
1759
                ++w1;
48✔
1760
                ++w2;
48✔
1761
            }
48✔
1762
        }
372✔
1763
        while (w1 != old_words.end()) {
168✔
1764
            erase_string(key, *w1);
×
1765
            ++w1;
×
1766
        }
×
1767
        while (w2 != new_words.end()) {
3,354✔
1768
            Mixed m(*w2);
3,186✔
1769
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
3,186✔
1770

1,593✔
1771
            ++w2;
3,186✔
1772
        }
3,186✔
1773
    }
168✔
1774
    else {
439,221✔
1775
        if (REALM_LIKELY(new_value2 != old_value)) {
439,221✔
1776
            // We must erase this row first because erase uses find_first which
216,027✔
1777
            // might find the duplicate if we insert before erasing.
216,027✔
1778
            erase(key); // Throws
432,756✔
1779

216,027✔
1780
            auto index_data = new_value2.get_index_data(buffer);
432,756✔
1781
            insert_with_offset(key, index_data, new_value2, 0); // Throws
432,756✔
1782
        }
432,756✔
1783
    }
439,221✔
1784
}
439,389✔
1785

1786
void StringIndex::node_add_key(ref_type ref)
1787
{
168✔
1788
    REALM_ASSERT(ref);
168✔
1789
    REALM_ASSERT(m_array->is_inner_bptree_node());
168✔
1790

78✔
1791
    Allocator& alloc = m_array->get_alloc();
168✔
1792
    Array offsets(alloc);
168✔
1793
    get_child(*m_array, 0, offsets);
168✔
1794
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
168✔
1795
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
168✔
1796

78✔
1797
    Array new_top(alloc);
168✔
1798
    Array new_offsets(alloc);
168✔
1799
    new_top.init_from_ref(ref);
168✔
1800
    new_offsets.init_from_ref(new_top.get_as_ref(0));
168✔
1801
    REALM_ASSERT(!new_offsets.is_empty());
168✔
1802

78✔
1803
    int64_t key = new_offsets.back();
168✔
1804
    offsets.add(key);
168✔
1805
    m_array->add(ref);
168✔
1806
}
168✔
1807

1808
// Must return true if value of object(key) is less than needle.
1809
bool SortedListComparator::operator()(int64_t key_value, Mixed needle) // used in lower_bound
1810
{
20,421,255✔
1811
    Mixed a = m_column.get_value(ObjKey(key_value));
20,421,255✔
1812
    if (a.is_null() && !needle.is_null())
20,421,255✔
1813
        return true;
699✔
1814
    else if (needle.is_null() && !a.is_null())
20,420,556✔
1815
        return false;
6✔
1816
    else if (a.is_null() && needle.is_null())
20,420,550✔
1817
        return false;
359,826✔
1818

10,018,134✔
1819
    if (a == needle)
20,060,724✔
1820
        return false;
20,045,550✔
1821

13,626✔
1822
    // The Mixed::operator< uses a lexicograpical comparison, therefore we
13,626✔
1823
    // cannot use our utf8_compare here because we need to be consistent with
13,626✔
1824
    // using the same compare method as how these strings were they were put
13,626✔
1825
    // into this ordered column in the first place.
13,626✔
1826
    return a.compare_signed(needle) < 0;
15,174✔
1827
}
15,174✔
1828

1829

1830
// Must return true if value of needle is less than value of object(key).
1831
bool SortedListComparator::operator()(Mixed needle, int64_t key_value) // used in upper_bound
1832
{
18,310,056✔
1833
    Mixed a = m_column.get_value(ObjKey(key_value));
18,310,056✔
1834
    if (needle == a) {
18,316,767✔
1835
        return false;
18,314,106✔
1836
    }
18,314,106✔
1837
    return !(*this)(key_value, needle);
2,147,486,308✔
1838
}
2,147,486,308✔
1839

1840
// LCOV_EXCL_START ignore debug functions
1841

1842

1843
void StringIndex::verify() const
1844
{
1,320✔
1845
#ifdef REALM_DEBUG
1,320✔
1846
    m_array->verify();
1,320✔
1847

660✔
1848
    Allocator& alloc = m_array->get_alloc();
1,320✔
1849
    const size_t array_size = m_array->size();
1,320✔
1850

660✔
1851
    // Get first matching row for every key
660✔
1852
    if (m_array->is_inner_bptree_node()) {
1,320✔
1853
        for (size_t i = 1; i < array_size; ++i) {
×
1854
            size_t ref = m_array->get_as_ref(i);
×
1855
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1856
            ndx.verify();
×
1857
        }
×
1858
    }
×
1859
    else {
1,320✔
1860
        size_t column_size = m_target_column.size();
1,320✔
1861
        for (size_t i = 1; i < array_size; ++i) {
2,736✔
1862
            int64_t ref = m_array->get(i);
1,416✔
1863

708✔
1864
            // low bit set indicate literal ref (shifted)
708✔
1865
            if (ref & 1) {
1,416✔
1866
                size_t r = to_size_t((uint64_t(ref) >> 1));
96✔
1867
                REALM_ASSERT_EX(r < column_size, r, column_size);
96✔
1868
            }
96✔
1869
            else {
1,320✔
1870
                // A real ref either points to a list or a subindex
660✔
1871
                char* header = alloc.translate(to_ref(ref));
1,320✔
1872
                if (Array::get_context_flag_from_header(header)) {
1,320✔
1873
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
1,296✔
1874
                    ndx.verify();
1,296✔
1875
                }
1,296✔
1876
                else {
24✔
1877
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
24✔
1878
                    IntegerColumn::const_iterator it = sub.cbegin();
24✔
1879
                    IntegerColumn::const_iterator it_end = sub.cend();
24✔
1880
                    SortedListComparator slc(m_target_column);
24✔
1881
                    Mixed previous = get(ObjKey(*it));
24✔
1882
                    size_t last_row = to_size_t(*it);
24✔
1883

12✔
1884
                    // Check that strings listed in sub are in sorted order
12✔
1885
                    // and if there are duplicates, that the row numbers are
12✔
1886
                    // sorted in the group of duplicates.
12✔
1887
                    while (it != it_end) {
96✔
1888
                        Mixed it_data = get(ObjKey(*it));
72✔
1889
                        size_t it_row = to_size_t(*it);
72✔
1890
                        REALM_ASSERT(previous.compare_signed(it_data) <= 0);
72✔
1891
                        if (it != sub.cbegin() && previous == it_data) {
72✔
1892
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
×
1893
                        }
×
1894
                        last_row = it_row;
72✔
1895
                        previous = get(ObjKey(*it));
72✔
1896
                        ++it;
72✔
1897
                    }
72✔
1898
                }
24✔
1899
            }
1,320✔
1900
        }
1,416✔
1901
    }
1,320✔
1902
// FIXME: Extend verification along the lines of IntegerColumn::verify().
660✔
1903
#endif
1,320✔
1904
}
1,320✔
1905

1906
#ifdef REALM_DEBUG
1907

1908
template <class T>
1909
void StringIndex::verify_entries(const ClusterColumn& column) const
1910
{
1911
    std::vector<ObjKey> results;
1912

1913
    auto it = column.begin();
1914
    auto end = column.end();
1915
    auto col = column.get_column_key();
1916
    while (it != end) {
1917
        ObjKey key = it->get_key();
1918
        T value = it->get<T>(col);
1919

1920
        find_all(results, value);
1921

1922
        auto ndx = find(results.begin(), results.end(), key);
1923
        REALM_ASSERT(ndx != results.end());
1924
        size_t found = count(value);
1925
        REALM_ASSERT_EX(found >= 1, found);
1926
        results.clear();
1927
    }
1928
}
1929

1930
namespace {
1931

1932
void out_hex(std::ostream& out, uint64_t val)
1933
{
×
1934
    if (val) {
×
1935
        out_hex(out, val >> 8);
×
1936
        if (char c = val & 0xFF) {
×
1937
            out << c;
×
1938
        }
×
1939
    }
×
1940
}
×
1941

1942
} // namespace
1943

1944
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
1945
{
×
1946
    int indent = level * 2;
×
1947
    Allocator& alloc = node.get_alloc();
×
1948
    Array subnode(alloc);
×
1949

1950
    size_t node_size = node.size();
×
1951
    REALM_ASSERT(node_size >= 1);
×
1952

1953
    out << std::hex;
×
1954

1955
    bool node_is_leaf = !node.is_inner_bptree_node();
×
1956
    if (node_is_leaf) {
×
1957
        out << std::setw(indent) << ""
×
1958
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1959
    }
×
1960
    else {
×
1961
        out << std::setw(indent) << ""
×
1962
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1963
    }
×
1964

1965
    subnode.init_from_ref(to_ref(node.front()));
×
1966
    out << std::setw(indent) << ""
×
1967
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
1968
    if (subnode.is_empty()) {
×
1969
        out << "no keys";
×
1970
    }
×
1971
    else {
×
1972
        out << "keys: ";
×
1973
        for (size_t i = 0; i != subnode.size(); ++i) {
×
1974
            if (i != 0)
×
1975
                out << ", ";
×
1976
            out_hex(out, subnode.get(i));
×
1977
        }
×
1978
    }
×
1979
    out << ")\n";
×
1980

1981
    if (node_is_leaf) {
×
1982
        for (size_t i = 1; i != node_size; ++i) {
×
1983
            int_fast64_t value = node.get(i);
×
1984
            bool is_single_row_index = (value & 1) != 0;
×
1985
            if (is_single_row_index) {
×
1986
                out << std::setw(indent) << ""
×
1987
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
1988
                continue;
×
1989
            }
×
1990
            subnode.init_from_ref(to_ref(value));
×
1991
            bool is_subindex = subnode.get_context_flag();
×
1992
            if (is_subindex) {
×
1993
                out << std::setw(indent) << ""
×
1994
                    << "  Subindex\n";
×
1995
                dump_node_structure(subnode, out, level + 2);
×
1996
                continue;
×
1997
            }
×
1998
            IntegerColumn indexes(alloc, to_ref(value));
×
1999
            out << std::setw(indent) << ""
×
2000
                << "  List of row indexes\n";
×
2001
            indexes.dump_values(out, level + 2);
×
2002
        }
×
2003
        return;
×
2004
    }
×
2005

2006

2007
    size_t num_children = node_size - 1;
×
2008
    size_t child_ref_begin = 1;
×
2009
    size_t child_ref_end = 1 + num_children;
×
2010
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
2011
        subnode.init_from_ref(node.get_as_ref(i));
×
2012
        dump_node_structure(subnode, out, level + 1);
×
2013
    }
×
2014
}
×
2015

2016
void StringIndex::dump_node_structure() const
NEW
2017
{
×
NEW
2018
    do_dump_node_structure(std::cout, 0);
×
NEW
2019
}
×
2020

2021
void StringIndex::do_dump_node_structure(std::ostream& out, int level) const
2022
{
×
2023
    dump_node_structure(*m_array, out, level);
×
2024
}
×
2025

2026
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc