• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / james.stone_385

26 Sep 2023 08:24PM UTC coverage: 90.917% (+0.03%) from 90.892%
james.stone_385

Pull #6670

Evergreen

ironage
fix lint
Pull Request #6670: Sorting stage 3

97066 of 177964 branches covered (0.0%)

901 of 927 new or added lines in 13 files covered. (97.2%)

122 existing lines in 17 files now uncovered.

236115 of 259704 relevant lines covered (90.92%)

6670991.87 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.25
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/timestamp.hpp>
31
#include <realm/column_integer.hpp>
32
#include <realm/unicode.hpp>
33
#include <realm/tokenizer.hpp>
34

35
using namespace realm;
36
using namespace realm::util;
37

38
namespace {
39

40
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
41
{
19,858,317✔
42
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
19,858,317✔
43
    child.init_from_ref(child_ref);
19,858,317✔
44
    child.set_parent(&parent, child_ref_ndx);
19,858,317✔
45
}
19,858,317✔
46

47
// This method reconstructs the string inserted in the search index based on a string
48
// that matches so far and the last key (only works if complete strings are stored in the index)
49
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
50
{
1,086✔
51
    if (key == 0)
1,086✔
52
        return StringData();
×
53

543✔
54
    size_t rest_len = 4;
1,086✔
55
    char* k = reinterpret_cast<char*>(&key);
1,086✔
56
    if (k[0] == 'X')
1,086✔
57
        rest_len = 3;
234✔
58
    else if (k[1] == 'X')
852✔
59
        rest_len = 2;
360✔
60
    else if (k[2] == 'X')
492✔
61
        rest_len = 1;
228✔
62
    else if (k[3] == 'X')
264✔
63
        rest_len = 0;
36✔
64

543✔
65
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,086✔
66

543✔
67
    return StringData(new_string.data(), offset + rest_len);
1,086✔
68
}
1,086✔
69

70

71
} // anonymous namespace
72

73
DataType ClusterColumn::get_data_type() const
74
{
×
75
    const Table* table = m_cluster_tree->get_owning_table();
×
76
    return table->get_column_type(m_column_key);
×
77
}
×
78

79
Mixed ClusterColumn::get_value(ObjKey key) const
80
{
43,393,164✔
81
    const Obj obj{m_cluster_tree->get(key)};
43,393,164✔
82
    return obj.get_any(m_column_key);
43,393,164✔
83
}
43,393,164✔
84

85
std::vector<ObjKey> ClusterColumn::get_all_keys() const
86
{
18✔
87
    std::vector<ObjKey> ret;
18✔
88
    ret.reserve(m_cluster_tree->size());
18✔
89
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
90
        auto sz = cluster->node_size();
18✔
91
        for (size_t i = 0; i < sz; i++) {
198✔
92
            ret.push_back(cluster->get_real_key(i));
180✔
93
        }
180✔
94

9✔
95
        return IteratorControl::AdvanceToNext;
18✔
96
    });
18✔
97
    return ret;
18✔
98
}
18✔
99

100
template <>
101
int64_t IndexArray::from_list<index_FindFirst>(const Mixed& value, InternalFindResult& /* result_ref */,
102
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
103
{
5,442✔
104
    SortedListComparator slc(column);
5,442✔
105

2,685✔
106
    IntegerColumn::const_iterator it_end = key_values.cend();
5,442✔
107
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
5,442✔
108
    if (lower == it_end)
5,442✔
109
        return null_key.value;
×
110

2,685✔
111
    int64_t first_key_value = *lower;
5,442✔
112

2,685✔
113
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,442✔
114
    if (actual != value)
5,442✔
115
        return null_key.value;
×
116

2,685✔
117
    return first_key_value;
5,442✔
118
}
5,442✔
119

120
template <>
121
int64_t IndexArray::from_list<index_Count>(const Mixed& value, InternalFindResult& /* result_ref */,
122
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
123
{
8,142✔
124
    SortedListComparator slc(column);
8,142✔
125

4,044✔
126
    IntegerColumn::const_iterator it_end = key_values.cend();
8,142✔
127
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
8,142✔
128
    if (lower == it_end)
8,142✔
129
        return 0;
24✔
130

4,032✔
131
    int64_t first_key_value = *lower;
8,118✔
132

4,032✔
133
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,118✔
134
    if (actual != value)
8,118✔
135
        return 0;
24✔
136

4,020✔
137
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
8,094✔
138
    int64_t cnt = upper - lower;
8,094✔
139

4,020✔
140
    return cnt;
8,094✔
141
}
8,094✔
142

143
template <>
144
int64_t IndexArray::from_list<index_FindAll_nocopy>(const Mixed& value, InternalFindResult& result_ref,
145
                                                    const IntegerColumn& key_values,
146
                                                    const ClusterColumn& column) const
147
{
2,169✔
148
    SortedListComparator slc(column);
2,169✔
149
    IntegerColumn::const_iterator it_end = key_values.cend();
2,169✔
150
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
2,169✔
151
    if (lower == it_end)
2,169✔
152
        return size_t(FindRes_not_found);
54✔
153

1,029✔
154
    ObjKey first_key = ObjKey(*lower);
2,115✔
155

1,029✔
156
    Mixed actual = column.get_value(ObjKey(first_key));
2,115✔
157
    if (actual != value)
2,115✔
158
        return size_t(FindRes_not_found);
69✔
159

996✔
160
    // Optimization: check the last entry before trying upper bound.
996✔
161
    IntegerColumn::const_iterator upper = it_end;
2,046✔
162
    --upper;
2,046✔
163
    // Single result if upper matches lower
996✔
164
    if (upper == lower) {
2,046✔
165
        result_ref.payload = *lower;
351✔
166
        return size_t(FindRes_single);
351✔
167
    }
351✔
168

825✔
169
    // Check string value at upper, if equal return matches in (lower, upper]
825✔
170
    ObjKey last_key = ObjKey(*upper);
1,695✔
171
    actual = column.get_value(ObjKey(last_key));
1,695✔
172
    if (actual == value) {
1,695✔
173
        result_ref.payload = from_ref(key_values.get_ref());
1,161✔
174
        result_ref.start_ndx = lower.get_position();
1,161✔
175
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,161✔
176
        return size_t(FindRes_column);
1,161✔
177
    }
1,161✔
178

264✔
179
    // Last result is not equal, find the upper bound of the range of results.
264✔
180
    // Note that we are passing upper which is cend() - 1 here as we already
264✔
181
    // checked the last item manually.
264✔
182
    upper = std::upper_bound(lower, upper, value, slc);
534✔
183

264✔
184
    result_ref.payload = from_ref(key_values.get_ref());
534✔
185
    result_ref.start_ndx = lower.get_position();
534✔
186
    result_ref.end_ndx = upper.get_position();
534✔
187
    return size_t(FindRes_column);
534✔
188
}
534✔
189

190

191
template <IndexMethod method>
192
int64_t IndexArray::index_string(const Mixed& value, InternalFindResult& result_ref,
193
                                 const ClusterColumn& column) const
194
{
2,268,126✔
195
    // Return`realm::not_found`, or an index to the (any) match
1,121,202✔
196
    constexpr bool first(method == index_FindFirst);
2,268,126✔
197
    // Return 0, or the number of items that match the specified `value`
1,121,202✔
198
    constexpr bool get_count(method == index_Count);
2,268,126✔
199
    // Same as `index_FindAll` but does not copy matching rows into `column`
1,121,202✔
200
    // returns FindRes_not_found if there are no matches
1,121,202✔
201
    // returns FindRes_single and the row index (literal) in result_ref.payload
1,121,202✔
202
    // or returns FindRes_column and the reference to a column of duplicates in
1,121,202✔
203
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
1,121,202✔
204
    constexpr bool allnocopy(method == index_FindAll_nocopy);
2,268,126✔
205

1,121,202✔
206
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
2,268,126✔
207

1,121,202✔
208
    const char* data = m_data;
2,268,126✔
209
    const char* header;
2,268,126✔
210
    uint_least8_t width = m_width;
2,268,126✔
211
    bool is_inner_node = m_is_inner_bptree_node;
2,268,126✔
212
    typedef StringIndex::key_type key_type;
2,268,126✔
213
    size_t stringoffset = 0;
2,268,126✔
214

1,121,202✔
215
    StringConversionBuffer buffer;
2,268,126✔
216
    StringData index_data = value.get_index_data(buffer);
2,268,126✔
217

1,121,202✔
218
    // Create 4 byte index key
1,121,202✔
219
    key_type key = StringIndex::create_key(index_data, stringoffset);
2,268,126✔
220

1,121,202✔
221
    for (;;) {
2,768,973✔
222
        // Get subnode table
1,364,253✔
223
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
2,768,973✔
224

1,364,253✔
225
        // Find the position matching the key
1,364,253✔
226
        const char* offsets_header = m_alloc.translate(offsets_ref);
2,768,973✔
227
        const char* offsets_data = get_data_from_header(offsets_header);
2,768,973✔
228
        size_t offsets_size = get_size_from_header(offsets_header);
2,768,973✔
229
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,768,973✔
230

1,364,253✔
231
        // If key is outside range, we know there can be no match
1,364,253✔
232
        if (pos == offsets_size)
2,768,973✔
233
            return local_not_found;
902,751✔
234

915,423✔
235
        // Get entry under key
915,423✔
236
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,866,222✔
237
        uint64_t ref = get_direct(data, width, pos_refs);
1,866,222✔
238

915,423✔
239
        if (is_inner_node) {
1,866,222✔
240
            // Set vars for next iteration
26,655✔
241
            header = m_alloc.translate(to_ref(ref));
62,259✔
242
            data = get_data_from_header(header);
62,259✔
243
            width = get_width_from_header(header);
62,259✔
244
            is_inner_node = get_is_inner_bptree_node_from_header(header);
62,259✔
245
            continue;
62,259✔
246
        }
62,259✔
247

888,768✔
248
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,803,963✔
249

888,768✔
250
        if (stored_key != key)
1,803,963✔
251
            return local_not_found;
279,924✔
252

759,327✔
253
        // Literal row index (tagged)
759,327✔
254
        if (ref & 1) {
1,524,039✔
255
            int64_t key_value = int64_t(ref >> 1);
1,069,461✔
256

535,047✔
257
            Mixed a = column.is_fulltext() ? reconstruct_string(stringoffset, key, index_data)
535,131✔
258
                                           : column.get_value(ObjKey(key_value));
1,069,377✔
259
            if (a == value) {
1,069,461✔
260
                result_ref.payload = key_value;
1,066,938✔
261
                return first ? key_value : get_count ? 1 : FindRes_single;
1,057,689✔
262
            }
1,066,938✔
263
            return local_not_found;
2,523✔
264
        }
2,523✔
265

224,280✔
266
        const char* sub_header = m_alloc.translate(ref_type(ref));
454,578✔
267
        const bool sub_isindex = get_context_flag_from_header(sub_header);
454,578✔
268

224,280✔
269
        // List of row indices with common prefix up to this point, in sorted order.
224,280✔
270
        if (!sub_isindex) {
454,578✔
271
            const IntegerColumn sub(m_alloc, ref_type(ref));
16,041✔
272
            if (column.is_fulltext()) {
16,041✔
273
                result_ref.payload = ref;
288✔
274
                result_ref.start_ndx = 0;
288✔
275
                result_ref.end_ndx = sub.size();
288✔
276
                return FindRes_column;
288✔
277
            }
288✔
278

7,785✔
279
            return from_list<method>(value, result_ref, sub, column);
15,753✔
280
        }
15,753✔
281

216,351✔
282
        // Recurse into sub-index;
216,351✔
283
        header = sub_header;
438,537✔
284
        data = get_data_from_header(header);
438,537✔
285
        width = get_width_from_header(header);
438,537✔
286
        is_inner_node = get_is_inner_bptree_node_from_header(header);
438,537✔
287

216,351✔
288
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
216,351✔
289
        stringoffset += 4;
438,537✔
290

216,351✔
291
        // Update 4 byte index key
216,351✔
292
        key = StringIndex::create_key(index_data, stringoffset);
438,537✔
293
    }
438,537✔
294
}
2,268,126✔
295

296

297
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
298
                                   const ClusterColumn& column) const
299
{
1,422✔
300
    // optimization for the most common case, where all the strings under a given subindex are equal
711✔
301
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
1,422✔
302
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
303
    if (first == last) {
1,422✔
304
        if (!first.is_type(type_String))
102✔
305
            return;
×
306
        auto first_str_upper = case_map(first.get_string(), true);
102✔
307
        if (first_str_upper != upper_value) {
102✔
308
            return;
24✔
309
        }
24✔
310

39✔
311
        size_t sz = result.size() + rows.size();
78✔
312
        result.reserve(sz);
78✔
313
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
246✔
314
            result.push_back(ObjKey(*it));
168✔
315
        }
168✔
316
        return;
78✔
317
    }
78✔
318

660✔
319
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
320
    // same subindex column, but still not be identical
660✔
321
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
66,792✔
322
        ObjKey key = ObjKey(*it);
65,472✔
323
        Mixed val = column.get_value(key);
65,472✔
324
        if (val.is_type(type_String)) {
65,472✔
325
            auto upper_str = case_map(val.get_string(), true);
65,472✔
326
            if (upper_str == upper_value) {
65,472✔
327
                result.push_back(key);
1,440✔
328
            }
1,440✔
329
        }
65,472✔
330
    }
65,472✔
331

660✔
332
    return;
1,320✔
333
}
1,320✔
334

335

336
void IndexArray::from_list_all(const Mixed& value, std::vector<ObjKey>& result, const IntegerColumn& rows,
337
                               const ClusterColumn& column) const
338
{
43,182✔
339
    SortedListComparator slc(column);
43,182✔
340

22,389✔
341
    IntegerColumn::const_iterator it_end = rows.cend();
43,182✔
342
    IntegerColumn::const_iterator lower = std::lower_bound(rows.cbegin(), it_end, value, slc);
43,182✔
343
    if (lower == it_end)
43,182✔
344
        return;
×
345

22,389✔
346
    ObjKey key = ObjKey(*lower);
43,182✔
347

22,389✔
348
    Mixed a = column.get_value(key);
43,182✔
349
    if (a != value)
43,182✔
350
        return;
×
351

22,389✔
352
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
43,182✔
353

22,389✔
354
    // Copy all matches into result column
22,389✔
355
    size_t sz = result.size() + (upper - lower);
43,182✔
356
    result.reserve(sz);
43,182✔
357
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
809,661✔
358
        result.push_back(ObjKey(*it));
766,479✔
359
    }
766,479✔
360

22,389✔
361
    return;
43,182✔
362
}
43,182✔
363

364

365
namespace {
366

367
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
368

369
// replicates the 4 least significant bits each times 8
370
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
371
uint32_t replicate_4_lsb_x8(uint32_t i)
372
{
3,123,744✔
373
    REALM_ASSERT_DEBUG(i <= 15);
3,123,744✔
374
    i *= 0x204081;
3,123,744✔
375
    i &= 0x1010101;
3,123,744✔
376
    i *= 0xff;
3,123,744✔
377
    return i;
3,123,744✔
378
}
3,123,744✔
379

380
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
3,123,744✔
381
    return a ^ ((a ^ b) & mask);
3,123,744✔
382
}
3,123,744✔
383

384
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
385
// final key:
386
// Permutation 0  = "ABCD"
387
// Permutation 1  = "ABCd"
388
// Permutation 8  = "aBCD"
389
// Permutation 15 = "abcd"
390
using key_type = StringIndex::key_type;
391
static key_type generate_key(key_type upper, key_type lower, int permutation)
392
{
3,123,744✔
393
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
3,123,744✔
394
}
3,123,744✔
395

396

397
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
398
// when traversing the trees.
399
struct SearchList {
400
    struct Item {
401
        const char* header;
402
        size_t string_offset;
403
        key_type key;
404
    };
405

406
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
407
        : m_upper_value(upper_value)
408
        , m_lower_value(lower_value)
409
    {
6,582✔
410
        m_keys_seen.reserve(num_permutations);
6,582✔
411
    }
6,582✔
412

413
    // Add all unique keys for this level to the internal work stack
414
    void add_all_for_level(const char* header, size_t string_offset)
415
    {
195,234✔
416
        m_keys_seen.clear();
195,234✔
417
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,234✔
418
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,234✔
419
        for (int p = 0; p < num_permutations; ++p) {
3,318,978✔
420
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,561,872✔
421
            // size) being combined incorrectly.
1,561,872✔
422
            const key_type key = generate_key(upper_key, lower_key, p);
3,123,744✔
423
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
3,123,744✔
424
            if (new_key) {
3,123,744✔
425
                m_keys_seen.push_back(key);
3,059,706✔
426
                add_next(header, string_offset, key);
3,059,706✔
427
            }
3,059,706✔
428
        }
3,123,744✔
429
    }
195,234✔
430

431
    bool empty() const
432
    {
3,066,288✔
433
        return m_items.empty();
3,066,288✔
434
    }
3,066,288✔
435

436
    Item get_next()
437
    {
3,059,706✔
438
        Item item = m_items.back();
3,059,706✔
439
        m_items.pop_back();
3,059,706✔
440
        return item;
3,059,706✔
441
    }
3,059,706✔
442

443
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
444
    void add_next(const char* header, size_t string_offset, key_type key)
445
    {
3,059,706✔
446
        m_items.push_back({header, string_offset, key});
3,059,706✔
447
    }
3,059,706✔
448

449
private:
450
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
451

452
    std::vector<Item> m_items;
453

454
    const util::Optional<std::string> m_upper_value;
455
    const util::Optional<std::string> m_lower_value;
456

457
    std::vector<key_type> m_keys_seen;
458
};
459

460

461
} // namespace
462

463

464
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
465
                                      const ClusterColumn& column) const
466
{
6,582✔
467
    REALM_ASSERT(!value.is_null());
6,582✔
468

3,291✔
469
    const util::Optional<std::string> upper_value = case_map(value, true);
6,582✔
470
    const util::Optional<std::string> lower_value = case_map(value, false);
6,582✔
471
    SearchList search_list(upper_value, lower_value);
6,582✔
472

3,291✔
473
    const char* top_header = get_header_from_data(m_data);
6,582✔
474
    search_list.add_all_for_level(top_header, 0);
6,582✔
475

3,291✔
476
    while (!search_list.empty()) {
3,066,288✔
477
        SearchList::Item item = search_list.get_next();
3,059,706✔
478

1,529,853✔
479
        const char* const header = item.header;
3,059,706✔
480
        const size_t string_offset = item.string_offset;
3,059,706✔
481
        const key_type key = item.key;
3,059,706✔
482
        const char* const data = get_data_from_header(header);
3,059,706✔
483
        const uint_least8_t width = get_width_from_header(header);
3,059,706✔
484
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,059,706✔
485

1,529,853✔
486
        // Get subnode table
1,529,853✔
487
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
3,059,706✔
488

1,529,853✔
489
        // Find the position matching the key
1,529,853✔
490
        const char* const offsets_header = m_alloc.translate(offsets_ref);
3,059,706✔
491
        const char* const offsets_data = get_data_from_header(offsets_header);
3,059,706✔
492
        const size_t offsets_size = get_size_from_header(offsets_header);
3,059,706✔
493
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,059,706✔
494

1,529,853✔
495
        // If key is outside range, we know there can be no match
1,529,853✔
496
        if (pos == offsets_size)
3,059,706✔
497
            continue;
858✔
498

1,529,424✔
499
        // Get entry under key
1,529,424✔
500
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
3,058,848✔
501
        const uint64_t ref = get_direct(data, width, pos_refs);
3,058,848✔
502

1,529,424✔
503
        if (is_inner_node) {
3,058,848✔
504
            // Set vars for next iteration
505
            const char* const inner_header = m_alloc.translate(to_ref(ref));
×
506
            search_list.add_next(inner_header, string_offset, key);
×
507
            continue;
×
508
        }
×
509

1,529,424✔
510
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
3,058,848✔
511

1,529,424✔
512
        if (stored_key != key)
3,058,848✔
513
            continue;
2,863,128✔
514

97,860✔
515
        // Literal row index (tagged)
97,860✔
516
        if (ref & 1) {
195,720✔
517
            ObjKey k(int64_t(ref >> 1));
5,646✔
518

2,823✔
519
            // The buffer is needed when for when this is an integer index.
2,823✔
520
            StringConversionBuffer buffer;
5,646✔
521
            const StringData str = column.get_value(k).get_index_data(buffer);
5,646✔
522
            const util::Optional<std::string> upper_str = case_map(str, true);
5,646✔
523
            if (upper_str == upper_value) {
5,646✔
524
                result.push_back(k);
5,598✔
525
            }
5,598✔
526
            continue;
5,646✔
527
        }
5,646✔
528

95,037✔
529
        const char* const sub_header = m_alloc.translate(ref_type(ref));
190,074✔
530
        const bool sub_isindex = get_context_flag_from_header(sub_header);
190,074✔
531

95,037✔
532
        // List of row indices with common prefix up to this point, in sorted order.
95,037✔
533
        if (!sub_isindex) {
190,074✔
534
            const IntegerColumn sub(m_alloc, ref_type(ref));
1,422✔
535
            from_list_all_ins(upper_value, result, sub, column);
1,422✔
536
            continue;
1,422✔
537
        }
1,422✔
538

94,326✔
539
        // Recurse into sub-index;
94,326✔
540
        search_list.add_all_for_level(sub_header, string_offset + 4);
188,652✔
541
    }
188,652✔
542

3,291✔
543
    // sort the result and return a std::vector
3,291✔
544
    std::sort(result.begin(), result.end());
6,582✔
545
}
6,582✔
546

547

548
void IndexArray::index_string_all(const Mixed& value, std::vector<ObjKey>& result, const ClusterColumn& column) const
549
{
51,450✔
550
    const char* data = m_data;
51,450✔
551
    const char* header;
51,450✔
552
    uint_least8_t width = m_width;
51,450✔
553
    bool is_inner_node = m_is_inner_bptree_node;
51,450✔
554
    size_t stringoffset = 0;
51,450✔
555

26,358✔
556
    StringConversionBuffer buffer;
51,450✔
557
    StringData index_data = value.get_index_data(buffer);
51,450✔
558
    // Create 4 byte index key
26,358✔
559
    key_type key = StringIndex::create_key(index_data, stringoffset);
51,450✔
560

26,358✔
561
    for (;;) {
85,530✔
562
        // Get subnode table
42,885✔
563
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
85,530✔
564

42,885✔
565
        // Find the position matching the key
42,885✔
566
        const char* offsets_header = m_alloc.translate(offsets_ref);
85,530✔
567
        const char* offsets_data = get_data_from_header(offsets_header);
85,530✔
568
        size_t offsets_size = get_size_from_header(offsets_header);
85,530✔
569
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
85,530✔
570

42,885✔
571
        // If key is outside range, we know there can be no match
42,885✔
572
        if (pos == offsets_size)
85,530✔
573
            return;
×
574

42,885✔
575
        // Get entry under key
42,885✔
576
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
85,530✔
577
        uint64_t ref = get_direct(data, width, pos_refs);
85,530✔
578

42,885✔
579
        if (is_inner_node) {
85,530✔
580
            // Set vars for next iteration
581
            header = m_alloc.translate(ref_type(ref));
×
582
            data = get_data_from_header(header);
×
583
            width = get_width_from_header(header);
×
584
            is_inner_node = get_is_inner_bptree_node_from_header(header);
×
585
            continue;
×
586
        }
×
587

42,885✔
588
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
85,530✔
589

42,885✔
590
        if (stored_key != key)
85,530✔
591
            return;
6,120✔
592

39,825✔
593
        // Literal row index (tagged)
39,825✔
594
        if (ref & 1) {
79,410✔
595
            ObjKey k(int64_t(ref >> 1));
2,148✔
596

909✔
597
            Mixed a = column.get_value(k);
2,148✔
598
            if (a == value) {
2,148✔
599
                result.push_back(k);
2,148✔
600
                return;
2,148✔
601
            }
2,148✔
602
            return;
×
603
        }
×
604

38,916✔
605
        const char* sub_header = m_alloc.translate(ref_type(ref));
77,262✔
606
        const bool sub_isindex = get_context_flag_from_header(sub_header);
77,262✔
607

38,916✔
608
        // List of row indices with common prefix up to this point, in sorted order.
38,916✔
609
        if (!sub_isindex) {
77,262✔
610
            const IntegerColumn sub(m_alloc, ref_type(ref));
43,182✔
611
            return from_list_all(value, result, sub, column);
43,182✔
612
        }
43,182✔
613

16,527✔
614
        // Recurse into sub-index;
16,527✔
615
        header = sub_header;
34,080✔
616
        data = get_data_from_header(header);
34,080✔
617
        width = get_width_from_header(header);
34,080✔
618
        is_inner_node = get_is_inner_bptree_node_from_header(header);
34,080✔
619

16,527✔
620
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
16,527✔
621
        stringoffset += 4;
34,080✔
622

16,527✔
623
        // Update 4 byte index key
16,527✔
624
        key = StringIndex::create_key(index_data, stringoffset);
34,080✔
625
    }
34,080✔
626
}
51,450✔
627

628
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
629
{
72✔
630
    const char* sub_header = alloc.translate(ref_type(ref));
72✔
631
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
632

36✔
633
    if (sub_isindex) {
72✔
634
        Array tree(alloc);
36✔
635
        tree.init_from_ref(ref);
36✔
636
        auto sz = tree.size();
36✔
637
        for (size_t n = 1; n < sz; n++) {
90✔
638
            auto rot = tree.get_as_ref_or_tagged(n);
54✔
639
            // Literal row index (tagged)
27✔
640
            if (rot.is_tagged()) {
54✔
641
                result.insert(rot.get_as_int());
30✔
642
            }
30✔
643
            else {
24✔
644
                get_all_keys_below(result, rot.get_as_ref(), alloc);
24✔
645
            }
24✔
646
        }
54✔
647
    }
36✔
648
    else {
36✔
649
        IntegerColumn tree(alloc, ref);
36✔
650
        tree.for_all([&result](int64_t i) {
84✔
651
            result.insert(i);
84✔
652
        });
84✔
653
    }
36✔
654
}
72✔
655

656
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
657
{
48✔
658
    size_t stringoffset = 0;
48✔
659

24✔
660
    for (;;) {
72✔
661
        const char* data = NodeHeader::get_data_from_header(header);
72✔
662
        uint_least8_t width = get_width_from_header(header);
72✔
663

36✔
664
        // Create 4 byte lower and upper key
36✔
665
        key_type lower = 0;
72✔
666
        size_t i = 0;
72✔
667
        size_t n = str.size() - stringoffset;
72✔
668
        bool is_at_string_end = (n <= 4);
72✔
669
        if (!is_at_string_end) {
72✔
670
            n = 4;
24✔
671
        }
24✔
672
        while (i < n) {
282✔
673
            lower <<= 8;
210✔
674
            lower += str[stringoffset + i++];
210✔
675
        }
210✔
676
        size_t shift = (4 - i) * 8;
72✔
677
        key_type upper = lower + 1;
72✔
678
        lower <<= shift;
72✔
679
        upper <<= shift;
72✔
680
        upper--;
72✔
681

36✔
682
        // Get index array
36✔
683
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
72✔
684

36✔
685
        // Find the position matching the key
36✔
686
        const char* offsets_header = m_alloc.translate(offsets_ref);
72✔
687
        const char* offsets_data = get_data_from_header(offsets_header);
72✔
688
        size_t offsets_size = get_size_from_header(offsets_header);
72✔
689
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
72✔
690
        // If key is outside range, we know there can be no match
36✔
691
        if (pos == offsets_size)
72✔
692
            return;
×
693

36✔
694
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
72✔
695

36✔
696
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
72✔
697
            bool done = false;
×
698
            while (!done) {
×
699
                // Recursively call with child node
700
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
701
                _index_string_find_all_prefix(result, str, header);
×
702

703
                // Check if current node is past end of key range or last node
704
                auto key = get_direct<32>(offsets_data, pos++);
×
705
                done = key > upper || pos == offsets_size;
×
706
            }
×
707
            return;
×
708
        }
×
709

36✔
710
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
72✔
711
        if (is_at_string_end) {
72✔
712
            // now get all entries from start to end
24✔
713
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
108✔
714
                uint64_t ref = get_direct(data, width, ndx);
60✔
715
                // Literal row index (tagged)
30✔
716
                if (ref & 1) {
60✔
717
                    result.emplace(int64_t(ref >> 1));
12✔
718
                }
12✔
719
                else {
48✔
720
                    get_all_keys_below(result, to_ref(ref), m_alloc);
48✔
721
                }
48✔
722
            }
60✔
723
            return;
48✔
724
        }
48✔
725

12✔
726
        // When we are not at end of string then we are comparing against the whole key
12✔
727
        // and we can have at most one match
12✔
728
        REALM_ASSERT(end == pos + 1);
24✔
729
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
24✔
730
        stringoffset += 4;
24✔
731
    }
24✔
732
}
48✔
733

734
ObjKey IndexArray::index_string_find_first(const Mixed& value, const ClusterColumn& column) const
735
{
1,596,420✔
736
    InternalFindResult unused;
1,596,420✔
737
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,596,420✔
738
}
1,596,420✔
739

740

741
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, const Mixed& value, const ClusterColumn& column,
742
                                       bool case_insensitive) const
743
{
58,032✔
744
    if (case_insensitive && value.is_type(type_String)) {
58,032✔
745
        index_string_all_ins(value.get_string(), result, column);
6,582✔
746
    }
6,582✔
747
    else {
51,450✔
748
        index_string_all(value, result, column);
51,450✔
749
    }
51,450✔
750
}
58,032✔
751

752
FindRes IndexArray::index_string_find_all_no_copy(const Mixed& value, const ClusterColumn& column,
753
                                                  InternalFindResult& result) const
754
{
658,842✔
755
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
658,842✔
756
}
658,842✔
757

758
size_t IndexArray::index_string_count(const Mixed& value, const ClusterColumn& column) const
759
{
12,864✔
760
    InternalFindResult unused;
12,864✔
761
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
762
}
12,864✔
763

764
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
765
{
178,842✔
766
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
178,803✔
767
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
178,842✔
768
    top->create(type);                                      // Throws
178,842✔
769

89,595✔
770
    // Mark that this is part of index
89,595✔
771
    // (as opposed to columns under leaves)
89,595✔
772
    top->set_context_flag(true);
178,842✔
773

89,595✔
774
    // Add subcolumns for leaves
89,595✔
775
    Array values(alloc);
178,842✔
776
    values.create(Array::type_Normal);       // Throws
178,842✔
777
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
178,842✔
778
    top->add(values.get_ref());              // first entry in refs points to offsets
178,842✔
779

89,595✔
780
    return top;
178,842✔
781
}
178,842✔
782

783
StringIndex::key_type StringIndex::get_last_key() const
784
{
116,808✔
785
    Array offsets(m_array->get_alloc());
116,808✔
786
    get_child(*m_array, 0, offsets);
116,808✔
787
    return key_type(offsets.back());
116,808✔
788
}
116,808✔
789

790

791
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
792
{
9,222,021✔
793
    // Create 4 byte index key
4,599,273✔
794
    key_type key = create_key(index_data, offset);
9,222,021✔
795
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
9,222,021✔
796
}
9,222,021✔
797

798
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
799
                                                   const IntegerColumnIterator& lower)
800
{
2,065,575✔
801
    SortedListComparator slc(m_target_column);
2,065,575✔
802
    // At this point there exists duplicates of this value, we need to
1,033,863✔
803
    // insert value beside it's duplicates so that rows are also sorted
1,033,863✔
804
    // in ascending order.
1,033,863✔
805
    IntegerColumn::const_iterator upper = std::upper_bound(lower, list.cend(), value, slc);
2,065,575✔
806
    // find insert position (the list has to be kept in sorted order)
1,033,863✔
807
    // In most cases the refs will be added to the end. So we test for that
1,033,863✔
808
    // first to see if we can avoid the binary search for insert position
1,033,863✔
809
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,065,575✔
810
    int64_t last_key_value = *last;
2,065,575✔
811
    if (key.value >= last_key_value) {
2,065,575✔
812
        list.insert(upper.get_position(), key.value);
2,029,566✔
813
    }
2,029,566✔
814
    else {
36,009✔
815
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
36,009✔
816
        list.insert(inner_lower.get_position(), key.value);
36,009✔
817
    }
36,009✔
818
}
2,065,575✔
819

820
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
821
{
1,812✔
822
    SortedListComparator slc(m_target_column);
1,812✔
823
    IntegerColumn::const_iterator it_end = list.cend();
1,812✔
824
    IntegerColumn::const_iterator lower = std::lower_bound(list.cbegin(), it_end, value, slc);
1,812✔
825

861✔
826
    if (lower == it_end) {
1,812✔
827
        // Not found and everything is less, just append it to the end.
687✔
828
        list.add(key.value);
1,431✔
829
    }
1,431✔
830
    else {
381✔
831
        ObjKey lower_key = ObjKey(*lower);
381✔
832
        Mixed lower_value = get(lower_key);
381✔
833

174✔
834
        if (lower_value != value) {
381✔
835
            list.insert(lower.get_position(), key.value);
381✔
836
        }
381✔
837
        else {
×
838
            // At this point there exists duplicates of this value, we need to
839
            // insert value beside it's duplicates so that rows are also sorted
840
            // in ascending order.
841
            insert_to_existing_list_at_lower(key, value, list, lower);
×
842
        }
×
843
    }
381✔
844
}
1,812✔
845

846

847
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
848
{
2,325✔
849
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
2,325✔
850

1,299✔
851
    // Create 4 byte index key
1,299✔
852
    key_type key = create_key(index_data, offset);
2,325✔
853

1,299✔
854
    // Get subnode table
1,299✔
855
    Allocator& alloc = m_array->get_alloc();
2,325✔
856
    Array values(alloc);
2,325✔
857
    get_child(*m_array, 0, values);
2,325✔
858
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,325✔
859

1,299✔
860
    size_t ins_pos = values.lower_bound_int(key);
2,325✔
861
    if (ins_pos == values.size()) {
2,325✔
862
        // When key is outside current range, we can just add it
1,299✔
863
        values.add(key);
2,325✔
864
        m_array->add(ref);
2,325✔
865
        return;
2,325✔
866
    }
2,325✔
867

868
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
869
    // Since we only use this for moving existing values to new
870
    // subindexes, there should never be an existing match.
871
    key_type k = key_type(values.get(ins_pos));
×
872
    REALM_ASSERT(k != key);
×
873
#endif // LCOV_EXCL_STOP ignore debug code
×
874

875
    // If key is not present we add it at the correct location
876
    values.insert(ins_pos, key);
×
877
    m_array->insert(ins_pos + 1, ref);
×
878
}
×
879

880

881
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
882
{
9,229,401✔
883
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
9,229,401✔
884
    switch (nc.type) {
9,229,401✔
885
        case NodeChange::change_None:
9,237,270✔
886
            return;
9,237,270✔
887
        case NodeChange::change_InsertBefore: {
✔
888
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
×
889
            new_node.node_add_key(nc.ref1);
×
890
            new_node.node_add_key(get_ref());
×
891
            m_array->init_from_ref(new_node.get_ref());
×
892
            m_array->update_parent();
×
893
            return;
×
894
        }
×
895
        case NodeChange::change_InsertAfter: {
6✔
896
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
6✔
897
            new_node.node_add_key(get_ref());
6✔
898
            new_node.node_add_key(nc.ref1);
6✔
899
            m_array->init_from_ref(new_node.get_ref());
6✔
900
            m_array->update_parent();
6✔
901
            return;
6✔
902
        }
×
903
        case NodeChange::change_Split: {
63✔
904
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
63✔
905
            new_node.node_add_key(nc.ref1);
63✔
906
            new_node.node_add_key(nc.ref2);
63✔
907
            m_array->init_from_ref(new_node.get_ref());
63✔
908
            m_array->update_parent();
63✔
909
            return;
63✔
910
        }
×
911
    }
×
912
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
×
913
}
×
914

915

916
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
917
                                               const Mixed& value)
918
{
9,287,832✔
919
    Allocator& alloc = m_array->get_alloc();
9,287,832✔
920
    if (m_array->is_inner_bptree_node()) {
9,287,832✔
921
        // Get subnode table
20,211✔
922
        Array keys(alloc);
52,038✔
923
        get_child(*m_array, 0, keys);
52,038✔
924
        REALM_ASSERT(m_array->size() == keys.size() + 1);
52,038✔
925

20,211✔
926
        // Find the subnode containing the item
20,211✔
927
        size_t node_ndx = keys.lower_bound_int(key);
52,038✔
928
        if (node_ndx == keys.size()) {
52,038✔
929
            // node can never be empty, so try to fit in last item
6,627✔
930
            node_ndx = keys.size() - 1;
15,966✔
931
        }
15,966✔
932

20,211✔
933
        // Get sublist
20,211✔
934
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
52,038✔
935
        ref_type ref = m_array->get_as_ref(refs_ndx);
52,038✔
936
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
52,038✔
937

20,211✔
938
        // Insert item
20,211✔
939
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
52,038✔
940
        if (nc.type == NodeChange::change_None) {
52,038✔
941
            // update keys
20,199✔
942
            key_type last_key = target.get_last_key();
52,002✔
943
            keys.set(node_ndx, last_key);
52,002✔
944
            return NodeChange::change_None; // no new nodes
52,002✔
945
        }
52,002✔
946

12✔
947
        if (nc.type == NodeChange::change_InsertAfter) {
36✔
948
            ++node_ndx;
15✔
949
            ++refs_ndx;
15✔
950
        }
15✔
951

12✔
952
        // If there is room, just update node directly
12✔
953
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
36✔
954
            if (nc.type == NodeChange::change_Split) {
36✔
955
                node_insert_split(node_ndx, nc.ref2);
21✔
956
            }
21✔
957
            else {
15✔
958
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
15✔
959
            }
15✔
960
            return NodeChange::change_None;
36✔
961
        }
36✔
962

963
        // Else create new node
964
        StringIndex new_node(inner_node_tag(), alloc);
×
965
        if (nc.type == NodeChange::change_Split) {
×
966
            // update offset for left node
967
            key_type last_key = target.get_last_key();
×
968
            keys.set(node_ndx, last_key);
×
969

970
            new_node.node_add_key(nc.ref2);
×
971
            ++node_ndx;
×
972
            ++refs_ndx;
×
973
        }
×
974
        else {
×
975
            new_node.node_add_key(nc.ref1);
×
976
        }
×
977

978
        switch (node_ndx) {
×
979
            case 0: // insert before
×
980
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
981
            case REALM_MAX_BPNODE_SIZE: // insert after
×
982
                if (nc.type == NodeChange::change_Split)
×
983
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
984
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
985
            default: // split
×
986
                // Move items after split to new node
987
                size_t len = m_array->size();
×
988
                for (size_t i = refs_ndx; i < len; ++i) {
×
989
                    ref_type ref_i = m_array->get_as_ref(i);
×
990
                    new_node.node_add_key(ref_i);
×
991
                }
×
992
                keys.truncate(node_ndx);
×
993
                m_array->truncate(refs_ndx);
×
994
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
995
        }
9,235,794✔
996
    }
9,235,794✔
997
    else {
9,235,794✔
998
        // Is there room in the list?
4,606,386✔
999
        Array old_keys(alloc);
9,235,794✔
1000
        get_child(*m_array, 0, old_keys);
9,235,794✔
1001
        const size_t old_offsets_size = old_keys.size();
9,235,794✔
1002
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
9,235,794✔
1003

4,606,386✔
1004
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
9,235,794✔
1005

4,606,386✔
1006
        // See if we can fit entry into current leaf
4,606,386✔
1007
        // Works if there is room or it can join existing entries
4,606,386✔
1008
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
9,235,794✔
1009
            return NodeChange::change_None;
9,237,771✔
1010

102✔
1011
        // Create new list for item (a leaf)
102✔
1012
        StringIndex new_list(m_target_column, alloc);
2,147,483,749✔
1013

102✔
1014
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
2,147,483,749✔
1015

102✔
1016
        size_t ndx = old_keys.lower_bound_int(key);
2,147,483,749✔
1017

102✔
1018
        // insert before
102✔
1019
        if (ndx == 0)
2,147,483,749✔
1020
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
×
1021

102✔
1022
        // insert after
102✔
1023
        if (ndx == old_offsets_size)
2,147,483,749✔
1024
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
21✔
1025

93✔
1026
        // split
93✔
1027
        Array new_keys(alloc);
2,147,483,740✔
1028
        get_child(*new_list.m_array, 0, new_keys);
2,147,483,740✔
1029
        // Move items after split to new list
93✔
1030
        for (size_t i = ndx; i < old_offsets_size; ++i) {
40,377✔
1031
            int64_t v2 = old_keys.get(i);
40,284✔
1032
            int64_t v3 = m_array->get(i + 1);
40,284✔
1033

16,554✔
1034
            new_keys.add(v2);
40,284✔
1035
            new_list.m_array->add(v3);
40,284✔
1036
        }
40,284✔
1037
        old_keys.truncate(ndx);
2,147,483,740✔
1038
        m_array->truncate(ndx + 1);
2,147,483,740✔
1039

93✔
1040
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
2,147,483,740✔
1041
    }
2,147,483,740✔
1042
}
9,287,832✔
1043

1044

1045
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1046
{
21✔
1047
    REALM_ASSERT(m_array->is_inner_bptree_node());
21✔
1048
    REALM_ASSERT(new_ref);
21✔
1049

6✔
1050
    Allocator& alloc = m_array->get_alloc();
21✔
1051
    Array offsets(alloc);
21✔
1052
    get_child(*m_array, 0, offsets);
21✔
1053

6✔
1054
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
21✔
1055
    REALM_ASSERT(ndx < offsets.size());
21✔
1056
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
21✔
1057

6✔
1058
    // Get sublists
6✔
1059
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
21✔
1060
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
21✔
1061
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
21✔
1062
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
21✔
1063

6✔
1064
    // Update original key
6✔
1065
    key_type last_key = orig_col.get_last_key();
21✔
1066
    offsets.set(ndx, last_key);
21✔
1067

6✔
1068
    // Insert new ref
6✔
1069
    key_type new_key = new_col.get_last_key();
21✔
1070
    offsets.insert(ndx + 1, new_key);
21✔
1071
    m_array->insert(ndx + 2, new_ref);
21✔
1072
}
21✔
1073

1074

1075
void StringIndex::node_insert(size_t ndx, size_t ref)
1076
{
15✔
1077
    REALM_ASSERT(ref);
15✔
1078
    REALM_ASSERT(m_array->is_inner_bptree_node());
15✔
1079

6✔
1080
    Allocator& alloc = m_array->get_alloc();
15✔
1081
    Array offsets(alloc);
15✔
1082
    get_child(*m_array, 0, offsets);
15✔
1083
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
15✔
1084

6✔
1085
    REALM_ASSERT(ndx <= offsets.size());
15✔
1086
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
15✔
1087

6✔
1088
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
15✔
1089
    key_type last_key = col.get_last_key();
15✔
1090

6✔
1091
    offsets.insert(ndx, last_key);
15✔
1092
    m_array->insert(ndx + 1, ref);
15✔
1093
}
15✔
1094

1095
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1096
                              bool noextend)
1097
{
9,203,295✔
1098
    REALM_ASSERT(!m_array->is_inner_bptree_node());
9,203,295✔
1099

4,598,451✔
1100
    // Get subnode table
4,598,451✔
1101
    Allocator& alloc = m_array->get_alloc();
9,203,295✔
1102
    Array keys(alloc);
9,203,295✔
1103
    get_child(*m_array, 0, keys);
9,203,295✔
1104
    REALM_ASSERT(m_array->size() == keys.size() + 1);
9,203,295✔
1105

4,598,451✔
1106
    // If we are keeping the complete string in the index
4,598,451✔
1107
    // we want to know if this is the last part
4,598,451✔
1108
    bool is_at_string_end = offset + 4 >= index_data.size();
9,203,295✔
1109

4,598,451✔
1110
    size_t ins_pos = keys.lower_bound_int(key);
9,203,295✔
1111
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
9,203,295✔
1112

4,598,451✔
1113
    if (ins_pos == keys.size()) {
9,203,295✔
1114
        if (noextend)
334,206✔
1115
            return false;
21✔
1116

165,357✔
1117
        // When key is outside current range, we can just add it
165,357✔
1118
        keys.add(key);
334,185✔
1119
        if (!m_target_column.is_fulltext() || is_at_string_end) {
334,185✔
1120
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
333,375✔
1121
            m_array->add(shifted);
333,375✔
1122
        }
333,375✔
1123
        else {
810✔
1124
            // create subindex for rest of string
408✔
1125
            StringIndex subindex(m_target_column, m_array->get_alloc());
810✔
1126
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
810✔
1127
            m_array->add(subindex.get_ref());
810✔
1128
        }
810✔
1129
        return true;
334,185✔
1130
    }
334,185✔
1131

4,433,085✔
1132
    key_type k = key_type(keys.get(ins_pos));
8,869,089✔
1133

4,433,085✔
1134
    // If key is not present we add it at the correct location
4,433,085✔
1135
    if (k != key) {
8,869,089✔
1136
        if (noextend)
720,585✔
1137
            return false;
84✔
1138

349,554✔
1139
        keys.insert(ins_pos, key);
720,501✔
1140
        if (!m_target_column.is_fulltext() || is_at_string_end) {
720,501✔
1141
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
718,836✔
1142
            m_array->insert(ins_pos_refs, shifted);
718,836✔
1143
        }
718,836✔
1144
        else {
1,665✔
1145
            // create subindex for rest of string
822✔
1146
            StringIndex subindex(m_target_column, m_array->get_alloc());
1,665✔
1147
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
1,665✔
1148
            m_array->insert(ins_pos_refs, subindex.get_ref());
1,665✔
1149
        }
1,665✔
1150
        return true;
720,501✔
1151
    }
720,501✔
1152

4,083,498✔
1153
    // This leaf already has a slot for for the key
4,083,498✔
1154

4,083,498✔
1155
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,148,504✔
1156
    size_t suboffset = offset + s_index_key_length;
8,148,504✔
1157

4,083,498✔
1158
    // Single match (lowest bit set indicates literal row_ndx)
4,083,498✔
1159
    if ((slot_value & 1) != 0) {
8,148,504✔
1160
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
51,714✔
1161
        Mixed v2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
51,432✔
1162
        if (v2 == value) {
51,714✔
1163
            // Strings are equal but this is not a list.
4,860✔
1164
            // Create a list and add both rows.
4,860✔
1165

4,860✔
1166
            // convert to list (in sorted order)
4,860✔
1167
            Array row_list(alloc);
9,702✔
1168
            row_list.create(Array::type_Normal); // Throws
9,702✔
1169
            row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
9,027✔
1170
            row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
9,027✔
1171
            m_array->set(ins_pos_refs, row_list.get_ref());
9,702✔
1172
        }
9,702✔
1173
        else {
42,012✔
1174
            StringConversionBuffer buffer;
42,012✔
1175
            auto index_data_2 = v2.get_index_data(buffer);
42,012✔
1176
            if (index_data == index_data_2 || suboffset > s_max_offset) {
42,012✔
1177
                // These strings have the same prefix up to this point but we
279✔
1178
                // don't want to recurse further, create a list in sorted order.
279✔
1179
                bool row_ndx_first = value < v2;
573✔
1180
                Array row_list(alloc);
573✔
1181
                row_list.create(Array::type_Normal); // Throws
573✔
1182
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
447✔
1183
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
447✔
1184
                m_array->set(ins_pos_refs, row_list.get_ref());
573✔
1185
            }
573✔
1186
            else {
41,439✔
1187
                // These strings have the same prefix up to this point but they
21,294✔
1188
                // are actually not equal. Extend the tree recursivly until the
21,294✔
1189
                // prefix of these strings is different.
21,294✔
1190
                StringIndex subindex(m_target_column, m_array->get_alloc());
41,439✔
1191
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
41,439✔
1192
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
41,439✔
1193
                // Join the string of SubIndices to the current position of m_array
21,294✔
1194
                m_array->set(ins_pos_refs, subindex.get_ref());
41,439✔
1195
            }
41,439✔
1196
        }
42,012✔
1197
        return true;
51,714✔
1198
    }
51,714✔
1199

4,057,065✔
1200
    // If there already is a list of matches, we see if we fit there
4,057,065✔
1201
    // or it has to be split into a subindex
4,057,065✔
1202
    ref_type ref = ref_type(slot_value);
8,096,790✔
1203
    char* header = alloc.translate(ref);
8,096,790✔
1204
    if (!Array::get_context_flag_from_header(header)) {
8,096,790✔
1205
        IntegerColumn sub(alloc, ref); // Throws
2,069,091✔
1206
        sub.set_parent(m_array.get(), ins_pos_refs);
2,069,091✔
1207

1,035,843✔
1208
        IntegerColumn::const_iterator it_end = sub.cend();
2,069,091✔
1209
        IntegerColumn::const_iterator lower = it_end;
2,069,091✔
1210

1,035,843✔
1211
        auto value_exists_in_list = [&]() {
2,069,781✔
1212
            if (m_target_column.is_fulltext()) {
2,069,781✔
1213
                return reconstruct_string(offset, key, index_data) == value.get_string();
348✔
1214
            }
348✔
1215
            SortedListComparator slc(m_target_column);
2,069,433✔
1216
            lower = std::lower_bound(sub.cbegin(), it_end, value, slc);
2,069,433✔
1217

1,036,101✔
1218
            if (lower != it_end) {
2,069,433✔
1219
                Mixed lower_value = get(ObjKey(*lower));
2,066,136✔
1220
                if (lower_value == value) {
2,066,136✔
1221
                    return true;
2,065,287✔
1222
                }
2,065,287✔
1223
            }
4,146✔
1224
            return false;
4,146✔
1225
        };
4,146✔
1226

1,035,843✔
1227
        // If we found the value in this list, add the duplicate to the list.
1,035,843✔
1228
        if (value_exists_in_list()) {
2,069,091✔
1229
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
2,065,620✔
1230
        }
2,065,620✔
1231
        else {
3,471✔
1232
            // If the list only stores duplicates we are free to branch and
1,959✔
1233
            // and create a sub index with this existing list as one of the
1,959✔
1234
            // leafs, but if the list doesn't only contain duplicates we
1,959✔
1235
            // must respect that we store a common key prefix up to this
1,959✔
1236
            // point and insert into the existing list.
1,959✔
1237
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
3,471✔
1238
            StringConversionBuffer buffer;
3,471✔
1239
            StringData index_data_2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data)
1,962✔
1240
                                                                    : get(key_of_any_dup).get_index_data(buffer);
3,468✔
1241
            if (index_data == index_data_2 || suboffset > s_max_offset) {
4,131✔
1242
                insert_to_existing_list(obj_key, value, sub);
1,812✔
1243
            }
1,812✔
1244
            else {
1,659✔
1245
#ifdef REALM_DEBUG
1,659✔
1246
                bool contains_only_duplicates = true;
1,659✔
1247
                if (!m_target_column.is_fulltext() && sub.size() > 1) {
2,319✔
1248
                    ObjKey first_key = ObjKey(sub.get(0));
1,578✔
1249
                    ObjKey last_key = ObjKey(sub.back());
1,578✔
1250
                    auto first = get(first_key);
1,578✔
1251
                    auto last = get(last_key);
1,578✔
1252
                    // Since the list is kept in sorted order, the first and
918✔
1253
                    // last values will be the same only if the whole list is
918✔
1254
                    // storing duplicate values.
918✔
1255
                    if (first != last) {
1,578✔
1256
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
×
1257
                    }
×
1258
                }
1,578✔
1259
                REALM_ASSERT_DEBUG(contains_only_duplicates);
1,659✔
1260
#endif
1,659✔
1261
                // The buffer is needed for when this is an integer index.
1,098✔
1262
                StringIndex subindex(m_target_column, m_array->get_alloc());
1,659✔
1263
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
1,659✔
1264
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
1,659✔
1265
                m_array->set(ins_pos_refs, subindex.get_ref());
1,659✔
1266
            }
1,659✔
1267
        }
3,471✔
1268
        return true;
2,069,091✔
1269
    }
2,069,091✔
1270

3,021,222✔
1271
    // The key matches, but there is a subindex here so go down a level in the tree.
3,021,222✔
1272
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,027,699✔
1273
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
6,027,699✔
1274

3,021,222✔
1275
    return true;
6,027,699✔
1276
}
6,027,699✔
1277

1278
Mixed StringIndex::get(ObjKey key) const
1279
{
3,761,106✔
1280
    return m_target_column.get_value(key);
3,761,106✔
1281
}
3,761,106✔
1282

1283
void StringIndex::erase(ObjKey key)
1284
{
975,537✔
1285
    StringConversionBuffer buffer;
975,537✔
1286
    std::string_view value{(get(key).get_index_data(buffer))};
975,537✔
1287
    if (m_target_column.is_fulltext()) {
975,537✔
1288
        auto words = Tokenizer::get_instance()->reset(value).get_all_tokens();
72✔
1289
        for (auto& w : words) {
2,460✔
1290
            erase_string(key, w);
2,460✔
1291
        }
2,460✔
1292
    }
72✔
1293
    else {
975,465✔
1294
        erase_string(key, value);
975,465✔
1295
    }
975,465✔
1296
}
975,537✔
1297

1298
namespace {
1299
template <typename T>
1300
void intersect(std::vector<ObjKey>& result, T& keys)
1301
{
276✔
1302
    if (result.empty()) {
276✔
1303
        result.reserve(keys.size());
180✔
1304
        for (auto k : keys) {
498✔
1305
            result.emplace_back(k);
498✔
1306
        }
498✔
1307
    }
180✔
1308
    else {
96✔
1309
        auto it = result.begin();
96✔
1310
        auto keep = it;
96✔
1311
        auto m = keys.begin();
96✔
1312

48✔
1313
        // only keep intersection
48✔
1314
        while (it != result.end() && m != keys.end()) {
354✔
1315
            int64_t int_val = *m;
258✔
1316
            if (it->value < int_val) {
258✔
1317
                it++; // don't keep if match is not in new set
18✔
1318
            }
18✔
1319
            else if (it->value > int_val) {
240✔
1320
                ++m; // ignore new matches
102✔
1321
            }
102✔
1322
            else {
138✔
1323
                // Found both places - make sure it is kept
69✔
1324
                if (keep < it)
138✔
1325
                    *keep = *it;
6✔
1326
                ++keep;
138✔
1327
                ++it;
138✔
1328
                ++m;
138✔
1329
            }
138✔
1330
        }
258✔
1331
        if (keep != result.end()) {
96✔
1332
            result.erase(keep, result.end());
24✔
1333
        }
24✔
1334
    }
96✔
1335
}
276✔
1336

1337
struct FindResWrapper {
1338
    InternalFindResult& res;
1339
    IntegerColumn& indexes;
1340
    size_t n = 0;
1341
    size_t size()
1342
    {
144✔
1343
        return res.end_ndx - res.start_ndx;
144✔
1344
    }
144✔
1345
    auto begin()
1346
    {
228✔
1347
        return indexes.cbegin();
228✔
1348
    }
228✔
1349
    auto end()
1350
    {
384✔
1351
        return indexes.cend();
384✔
1352
    }
384✔
1353
};
1354
} // namespace
1355

1356
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1357
{
360✔
1358
    InternalFindResult res;
360✔
1359
    REALM_ASSERT(result.empty());
360✔
1360

180✔
1361
    auto tokenizer = Tokenizer::get_instance();
360✔
1362
    tokenizer->reset({value.data(), value.size()});
360✔
1363
    auto [includes, excludes] = tokenizer->get_search_tokens();
360✔
1364
    if (includes.empty()) {
360✔
1365
        if (excludes.empty()) {
24✔
1366
            throw InvalidArgument("Missing search token");
6✔
1367
        }
6✔
1368
        result = m_target_column.get_all_keys();
18✔
1369
    }
18✔
1370
    else {
336✔
1371
        for (auto& token : includes) {
414✔
1372
            if (token.back() == '*') {
414✔
1373
                std::set<int64_t> keys;
48✔
1374
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
48✔
1375
                intersect(result, keys);
48✔
1376
            }
48✔
1377
            else {
366✔
1378
                switch (find_all_no_copy(StringData{token}, res)) {
366✔
1379
                    case FindRes_not_found:
12✔
1380
                        result.clear();
12✔
1381
                        break;
12✔
1382
                    case FindRes_column: {
228✔
1383
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
228✔
1384
                        FindResWrapper wrapper{res, indexes};
228✔
1385
                        intersect(result, wrapper);
228✔
1386
                        break;
228✔
1387
                    }
×
1388
                    case FindRes_single:
126✔
1389
                        // merge in single res
63✔
1390
                        if (result.empty()) {
126✔
1391
                            result.emplace_back(res.payload);
96✔
1392
                        }
96✔
1393
                        else {
30✔
1394
                            ObjKey key(res.payload);
30✔
1395
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1396
                            if (pos != result.end() && key == *pos) {
30✔
1397
                                result.clear();
30✔
1398
                                result.push_back(key);
30✔
1399
                            }
30✔
1400
                            else {
×
1401
                                result.clear();
×
1402
                            }
×
1403
                        }
30✔
1404
                        break;
126✔
1405
                }
414✔
1406
            }
414✔
1407
            if (result.empty())
414✔
1408
                return;
18✔
1409
        }
414✔
1410
    }
336✔
1411

180✔
1412
    for (auto& token : excludes) {
348✔
1413
        if (token.back() == '*') {
96✔
1414
            throw IllegalOperation("Exclude by prefix is not implemented");
×
1415
        }
×
1416
        if (result.empty())
96✔
1417
            return;
×
1418

48✔
1419
        switch (find_all_no_copy(StringData{token}, res)) {
96✔
1420
            case FindRes_not_found:
✔
1421
                // Nothing to exclude
1422
                break;
×
1423
            case FindRes_column: {
60✔
1424
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1425

30✔
1426
                auto it = result.begin();
60✔
1427
                auto keep = it;
60✔
1428
                size_t m = res.start_ndx;
60✔
1429
                auto idx_val = indexes.get(m);
60✔
1430

30✔
1431
                while (it != result.end()) {
390✔
1432
                    if (it->value < idx_val) {
330✔
1433
                        // Not found in excludes
78✔
1434
                        if (keep < it)
156✔
1435
                            *keep = *it;
156✔
1436
                        ++keep;
156✔
1437
                        ++it;
156✔
1438
                    }
156✔
1439
                    else {
174✔
1440
                        if (it->value == idx_val) {
174✔
1441
                            // found in excludes - don't keep
75✔
1442
                            ++it;
150✔
1443
                        }
150✔
1444
                        ++m;
174✔
1445
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
153✔
1446
                    }
174✔
1447
                }
330✔
1448
                if (keep != result.end()) {
60✔
1449
                    result.erase(keep, result.end());
60✔
1450
                }
60✔
1451
                break;
60✔
1452
            }
×
1453
            case FindRes_single: {
36✔
1454
                // exclude single res
18✔
1455
                ObjKey key(res.payload);
36✔
1456
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1457
                if (pos != result.end() && key == *pos) {
36✔
1458
                    result.erase(pos);
36✔
1459
                }
36✔
1460
                break;
36✔
1461
            }
×
1462
        }
96✔
1463
    }
96✔
1464
}
336✔
1465

1466

1467
void StringIndex::clear()
1468
{
2,262✔
1469
    Array values(m_array->get_alloc());
2,262✔
1470
    get_child(*m_array, 0, values);
2,262✔
1471
    REALM_ASSERT(m_array->size() == values.size() + 1);
2,262✔
1472

474✔
1473
    values.clear();
2,262✔
1474
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
2,262✔
1475

474✔
1476
    size_t size = 1;
2,262✔
1477
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
2,262✔
1478

474✔
1479
    m_array->set_type(Array::type_HasRefs);
2,262✔
1480
}
2,262✔
1481

1482

1483
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
1484
{
1,282,968✔
1485
    Allocator& alloc = m_array->get_alloc();
1,282,968✔
1486
    Array values(alloc);
1,282,968✔
1487
    get_child(*m_array, 0, values);
1,282,968✔
1488
    REALM_ASSERT(m_array->size() == values.size() + 1);
1,282,968✔
1489

641,307✔
1490
    // Create 4 byte index key
641,307✔
1491
    key_type key = create_key(index_data, offset);
1,282,968✔
1492

641,307✔
1493
    const size_t pos = values.lower_bound_int(key);
1,282,968✔
1494
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,282,968✔
1495
    REALM_ASSERT(pos != values.size());
1,282,968✔
1496

641,307✔
1497
    if (m_array->is_inner_bptree_node()) {
1,282,968✔
1498
        ref_type ref = m_array->get_as_ref(pos_refs);
64,857✔
1499
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
64,857✔
1500
        node.do_delete(obj_key, index_data, offset);
64,857✔
1501

32,442✔
1502
        // Update the ref
32,442✔
1503
        if (node.is_empty()) {
64,857✔
1504
            values.erase(pos);
108✔
1505
            m_array->erase(pos_refs);
108✔
1506
            node.destroy();
108✔
1507
        }
108✔
1508
        else {
64,749✔
1509
            key_type max_val = node.get_last_key();
64,749✔
1510
            if (max_val != key_type(values.get(pos)))
64,749✔
1511
                values.set(pos, max_val);
108✔
1512
        }
64,749✔
1513
    }
64,857✔
1514
    else {
1,218,111✔
1515
        uint64_t ref = m_array->get(pos_refs);
1,218,111✔
1516
        if (ref & 1) {
1,218,111✔
1517
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
629,634✔
1518
            values.erase(pos);
629,634✔
1519
            m_array->erase(pos_refs);
629,634✔
1520
        }
629,634✔
1521
        else {
588,477✔
1522
            // A real ref either points to a list or a subindex
295,578✔
1523
            char* header = alloc.translate(ref_type(ref));
588,477✔
1524
            if (Array::get_context_flag_from_header(header)) {
588,477✔
1525
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
240,111✔
1526
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
240,111✔
1527

120,489✔
1528
                if (subindex.is_empty()) {
240,111✔
1529
                    values.erase(pos);
13,146✔
1530
                    m_array->erase(pos_refs);
13,146✔
1531
                    subindex.destroy();
13,146✔
1532
                }
13,146✔
1533
            }
240,111✔
1534
            else {
348,366✔
1535
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
348,366✔
1536
                sub.set_parent(m_array.get(), pos_refs);
348,366✔
1537
                size_t r = sub.find_first(obj_key.value);
348,366✔
1538
                size_t sub_size = sub.size(); // Slow
348,366✔
1539
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
348,366✔
1540
                sub.erase(r);
348,366✔
1541

175,089✔
1542
                if (sub_size == 1) {
348,366✔
1543
                    values.erase(pos);
3,336✔
1544
                    m_array->erase(pos_refs);
3,336✔
1545
                    sub.destroy();
3,336✔
1546
                }
3,336✔
1547
            }
348,366✔
1548
        }
588,477✔
1549
    }
1,218,111✔
1550
}
1,282,968✔
1551

1552
void StringIndex::erase_string(ObjKey key, StringData value)
1553
{
978,120✔
1554
    do_delete(key, value, 0);
978,120✔
1555

488,424✔
1556
    // Collapse top nodes with single item
488,424✔
1557
    while (m_array->is_inner_bptree_node()) {
978,126✔
1558
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
64,857✔
1559
        if (m_array->size() > 2)
64,857✔
1560
            break;
64,851✔
1561

3✔
1562
        ref_type ref = m_array->get_as_ref(1);
6✔
1563
        m_array->set(1, 1); // avoid destruction of the extracted ref
6✔
1564
        m_array->destroy_deep();
6✔
1565
        m_array->init_from_ref(ref);
6✔
1566
        m_array->update_parent();
6✔
1567
    }
6✔
1568
}
978,120✔
1569

1570
namespace {
1571

1572
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
1573
{
6,330✔
1574
    Allocator& alloc = node.get_alloc();
6,330✔
1575
    Array child(alloc);
6,330✔
1576
    size_t n = node.size();
6,330✔
1577
    REALM_ASSERT(n >= 1);
6,330✔
1578
    if (node.is_inner_bptree_node()) {
6,330✔
1579
        // Inner node
1580
        for (size_t i = 1; i < n; ++i) {
×
1581
            ref_type ref = node.get_as_ref(i);
×
1582
            child.init_from_ref(ref);
×
1583
            if (has_duplicate_values(child, target_col))
×
1584
                return true;
×
1585
        }
×
1586
        return false;
×
1587
    }
6,330✔
1588

3,165✔
1589
    // Leaf node
3,165✔
1590
    for (size_t i = 1; i < n; ++i) {
32,814✔
1591
        int_fast64_t value = node.get(i);
28,740✔
1592
        bool is_single_row_index = (value & 1) != 0;
28,740✔
1593
        if (is_single_row_index)
28,740✔
1594
            continue;
23,280✔
1595

2,730✔
1596
        ref_type ref = to_ref(value);
5,460✔
1597
        child.init_from_ref(ref);
5,460✔
1598

2,730✔
1599
        bool is_subindex = child.get_context_flag();
5,460✔
1600
        if (is_subindex) {
5,460✔
1601
            if (has_duplicate_values(child, target_col))
4,872✔
1602
                return true;
1,800✔
1603
            continue;
3,072✔
1604
        }
3,072✔
1605

294✔
1606
        // Child is root of B+-tree of row indexes
294✔
1607
        IntegerColumn sub(alloc, ref);
588✔
1608
        if (sub.size() > 1) {
588✔
1609
            ObjKey first_key = ObjKey(sub.get(0));
480✔
1610
            ObjKey last_key = ObjKey(sub.back());
480✔
1611
            Mixed first = target_col.get_value(first_key);
480✔
1612
            Mixed last = target_col.get_value(last_key);
480✔
1613
            // Since the list is kept in sorted order, the first and
240✔
1614
            // last values will be the same only if the whole list is
240✔
1615
            // storing duplicate values.
240✔
1616
            if (first == last) {
480✔
1617
                return true;
432✔
1618
            }
432✔
1619
            // There may also be several short lists combined, so we need to
24✔
1620
            // check each of these individually for duplicates.
24✔
1621
            IntegerColumn::const_iterator it = sub.cbegin();
48✔
1622
            IntegerColumn::const_iterator it_end = sub.cend();
48✔
1623
            SortedListComparator slc(target_col);
48✔
1624
            while (it != it_end) {
144✔
1625
                Mixed it_data = target_col.get_value(ObjKey(*it));
120✔
1626
                IntegerColumn::const_iterator next = std::upper_bound(it, it_end, it_data, slc);
120✔
1627
                size_t count_of_value = next - it; // row index subtraction in `sub`
120✔
1628
                if (count_of_value > 1) {
120✔
1629
                    return true;
24✔
1630
                }
24✔
1631
                it = next;
96✔
1632
            }
96✔
1633
        }
48✔
1634
    }
588✔
1635

3,165✔
1636
    return false;
5,202✔
1637
}
6,330✔
1638

1639
} // anonymous namespace
1640

1641

1642
bool StringIndex::has_duplicate_values() const noexcept
1643
{
1,458✔
1644
    return ::has_duplicate_values(*m_array, m_target_column);
1,458✔
1645
}
1,458✔
1646

1647

1648
bool StringIndex::is_empty() const
1649
{
305,133✔
1650
    return m_array->size() == 1; // first entry in refs points to offsets
305,133✔
1651
}
305,133✔
1652

1653
void StringIndex::insert(ObjKey key, const Mixed& value)
1654
{
2,450,409✔
1655
    StringConversionBuffer buffer;
2,450,409✔
1656
    constexpr size_t offset = 0; // First key from beginning of string
2,450,409✔
1657

1,213,146✔
1658
    if (this->m_target_column.is_fulltext()) {
2,450,409✔
1659
        if (value.is_type(type_String)) {
198✔
1660
            auto words = Tokenizer::get_instance()->reset(std::string_view(value.get<StringData>())).get_all_tokens();
198✔
1661
            for (auto& word : words) {
936✔
1662
                Mixed m(word);
936✔
1663
                insert_with_offset(key, m.get_index_data(buffer), m, offset); // Throws
936✔
1664
            }
936✔
1665
        }
198✔
1666
    }
198✔
1667
    else {
2,450,211✔
1668
        insert_with_offset(key, value.get_index_data(buffer), value, offset); // Throws
2,450,211✔
1669
    }
2,450,211✔
1670
}
2,450,409✔
1671

1672
void StringIndex::set(ObjKey key, const Mixed& new_value)
1673
{
663,465✔
1674
    StringConversionBuffer buffer;
663,465✔
1675
    Mixed old_value = get(key);
663,465✔
1676

331,836✔
1677
    if (this->m_target_column.is_fulltext()) {
663,465✔
1678
        auto tokenizer = Tokenizer::get_instance();
168✔
1679
        StringData old_string = old_value.get_index_data(buffer);
168✔
1680
        std::set<std::string> old_words;
168✔
1681

84✔
1682
        if (old_string.size() > 0) {
168✔
1683
            tokenizer->reset({old_string.data(), old_string.size()});
6✔
1684
            old_words = tokenizer->get_all_tokens();
6✔
1685
        }
6✔
1686
        std::set<std::string> new_words;
168✔
1687
        if (new_value.is_type(type_String)) {
168✔
1688
            new_words = tokenizer->reset(std::string_view(new_value.get<StringData>())).get_all_tokens();
168✔
1689
        }
168✔
1690

84✔
1691
        auto w1 = old_words.begin();
168✔
1692
        auto w2 = new_words.begin();
168✔
1693

84✔
1694
        // Do a diff, deleting words no longer present and
84✔
1695
        // inserting new words
84✔
1696
        while (w1 != old_words.end() && w2 != new_words.end()) {
540✔
1697
            if (*w1 < *w2) {
372✔
1698
                erase_string(key, *w1);
126✔
1699
                ++w1;
126✔
1700
            }
126✔
1701
            else if (*w2 < *w1) {
246✔
1702
                Mixed m(*w2);
198✔
1703
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
198✔
1704
                ++w2;
198✔
1705
            }
198✔
1706
            else {
48✔
1707
                ++w1;
48✔
1708
                ++w2;
48✔
1709
            }
48✔
1710
        }
372✔
1711
        while (w1 != old_words.end()) {
168✔
1712
            erase_string(key, *w1);
×
1713
            ++w1;
×
1714
        }
×
1715
        while (w2 != new_words.end()) {
3,354✔
1716
            Mixed m(*w2);
3,186✔
1717
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
3,186✔
1718

1,593✔
1719
            ++w2;
3,186✔
1720
        }
3,186✔
1721
    }
168✔
1722
    else {
663,297✔
1723
        if (REALM_LIKELY(new_value != old_value)) {
663,297✔
1724
            // We must erase this row first because erase uses find_first which
317,085✔
1725
            // might find the duplicate if we insert before erasing.
317,085✔
1726
            erase(key); // Throws
633,843✔
1727

317,085✔
1728
            auto index_data = new_value.get_index_data(buffer);
633,843✔
1729
            insert_with_offset(key, index_data, new_value, 0); // Throws
633,843✔
1730
        }
633,843✔
1731
    }
663,297✔
1732
}
663,465✔
1733

1734
void StringIndex::node_add_key(ref_type ref)
1735
{
138✔
1736
    REALM_ASSERT(ref);
138✔
1737
    REALM_ASSERT(m_array->is_inner_bptree_node());
138✔
1738

60✔
1739
    Allocator& alloc = m_array->get_alloc();
138✔
1740
    Array offsets(alloc);
138✔
1741
    get_child(*m_array, 0, offsets);
138✔
1742
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
138✔
1743
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
138✔
1744

60✔
1745
    Array new_top(alloc);
138✔
1746
    Array new_offsets(alloc);
138✔
1747
    new_top.init_from_ref(ref);
138✔
1748
    new_offsets.init_from_ref(new_top.get_as_ref(0));
138✔
1749
    REALM_ASSERT(!new_offsets.is_empty());
138✔
1750

60✔
1751
    int64_t key = new_offsets.back();
138✔
1752
    offsets.add(key);
138✔
1753
    m_array->add(ref);
138✔
1754
}
138✔
1755

1756
// Must return true if value of object(key) is less than 'b'.
1757
bool SortedListComparator::operator()(int64_t key_value, const Mixed& b) // used in lower_bound
1758
{
20,350,680✔
1759
    Mixed a = m_column.get_value(ObjKey(key_value));
20,350,680✔
1760
    if (a.is_null() && !b.is_null())
20,350,680✔
1761
        return true;
714✔
1762
    else if (b.is_null() && !a.is_null())
20,349,966✔
1763
        return false;
6✔
1764
    else if (a.is_null() && b.is_null())
20,349,960✔
1765
        return false;
348,915✔
1766
    int cmp = a.compare(b);
20,001,045✔
1767
    return cmp == 0 ? false : cmp < 0;
4,294,967,294✔
1768
}
20,001,045✔
1769

1770

1771
// Must return true if value of 'a' is less than value of object(key).
1772
bool SortedListComparator::operator()(const Mixed& a, int64_t key_value) // used in upper_bound
1773
{
18,260,313✔
1774
    Mixed b = m_column.get_value(ObjKey(key_value));
18,260,313✔
1775
    if (a.is_null() && !b.is_null())
18,260,313✔
NEW
1776
        return true;
×
1777
    else if (b.is_null() && !a.is_null())
18,260,313✔
UNCOV
1778
        return false;
×
1779
    else if (a.is_null() && b.is_null())
18,260,313✔
1780
        return false;
296,661✔
1781
    int cmp = a.compare(b);
17,963,652✔
1782
    return cmp == 0 ? false : cmp < 0;
4,294,967,294✔
1783
}
17,963,652✔
1784

1785
// LCOV_EXCL_START ignore debug functions
1786

1787
#ifdef REALM_DEBUG
1788
void StringIndex::print() const
1789
{
×
1790
    dump_node_structure(*m_array, std::cout, 0);
×
1791
}
×
1792
#endif // REALM_DEBUG
1793

1794
void StringIndex::verify() const
1795
{
1,320✔
1796
#ifdef REALM_DEBUG
1,320✔
1797
    m_array->verify();
1,320✔
1798

660✔
1799
    Allocator& alloc = m_array->get_alloc();
1,320✔
1800
    const size_t array_size = m_array->size();
1,320✔
1801

660✔
1802
    // Get first matching row for every key
660✔
1803
    if (m_array->is_inner_bptree_node()) {
1,320✔
1804
        for (size_t i = 1; i < array_size; ++i) {
×
1805
            size_t ref = m_array->get_as_ref(i);
×
1806
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1807
            ndx.verify();
×
1808
        }
×
1809
    }
×
1810
    else {
1,320✔
1811
        size_t column_size = m_target_column.size();
1,320✔
1812
        for (size_t i = 1; i < array_size; ++i) {
2,736✔
1813
            int64_t ref = m_array->get(i);
1,416✔
1814

708✔
1815
            // low bit set indicate literal ref (shifted)
708✔
1816
            if (ref & 1) {
1,416✔
1817
                size_t r = to_size_t((uint64_t(ref) >> 1));
96✔
1818
                REALM_ASSERT_EX(r < column_size, r, column_size);
96✔
1819
            }
96✔
1820
            else {
1,320✔
1821
                // A real ref either points to a list or a subindex
660✔
1822
                char* header = alloc.translate(to_ref(ref));
1,320✔
1823
                if (Array::get_context_flag_from_header(header)) {
1,320✔
1824
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
1,296✔
1825
                    ndx.verify();
1,296✔
1826
                }
1,296✔
1827
                else {
24✔
1828
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
24✔
1829
                    IntegerColumn::const_iterator it = sub.cbegin();
24✔
1830
                    IntegerColumn::const_iterator it_end = sub.cend();
24✔
1831
                    SortedListComparator slc(m_target_column);
24✔
1832
                    Mixed previous = get(ObjKey(*it));
24✔
1833
                    size_t last_row = to_size_t(*it);
24✔
1834

12✔
1835
                    // Check that strings listed in sub are in sorted order
12✔
1836
                    // and if there are duplicates, that the row numbers are
12✔
1837
                    // sorted in the group of duplicates.
12✔
1838
                    while (it != it_end) {
96✔
1839
                        Mixed it_data = get(ObjKey(*it));
72✔
1840
                        size_t it_row = to_size_t(*it);
72✔
1841
                        REALM_ASSERT(previous <= it_data);
72✔
1842
                        if (it != sub.cbegin() && previous == it_data) {
72✔
1843
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
×
1844
                        }
×
1845
                        last_row = it_row;
72✔
1846
                        previous = get(ObjKey(*it));
72✔
1847
                        ++it;
72✔
1848
                    }
72✔
1849
                }
24✔
1850
            }
1,320✔
1851
        }
1,416✔
1852
    }
1,320✔
1853
// FIXME: Extend verification along the lines of IntegerColumn::verify().
660✔
1854
#endif
1,320✔
1855
}
1,320✔
1856

1857
#ifdef REALM_DEBUG
1858

1859
template <class T>
1860
void StringIndex::verify_entries(const ClusterColumn& column) const
1861
{
1862
    std::vector<ObjKey> results;
1863

1864
    auto it = column.begin();
1865
    auto end = column.end();
1866
    auto col = column.get_column_key();
1867
    while (it != end) {
1868
        ObjKey key = it->get_key();
1869
        T value = it->get<T>(col);
1870

1871
        find_all(results, value);
1872

1873
        auto ndx = find(results.begin(), results.end(), key);
1874
        REALM_ASSERT(ndx != results.end());
1875
        size_t found = count(value);
1876
        REALM_ASSERT_EX(found >= 1, found);
1877
        results.clear();
1878
    }
1879
}
1880

1881
namespace {
1882

1883
namespace {
1884

1885
bool is_chars(uint64_t val)
1886
{
×
1887
    if (val == 0)
×
1888
        return true;
×
1889
    if (is_chars(val >> 8)) {
×
1890
        char c = val & 0xFF;
×
1891
        if (!c || std::isalpha(c)) {
×
1892
            return true;
×
1893
        }
×
1894
    }
×
1895
    return false;
×
1896
}
×
1897

1898
void out_char(std::ostream& out, uint64_t val)
1899
{
×
1900
    if (val) {
×
1901
        out_char(out, val >> 8);
×
1902
        char c = val & 0xFF;
×
1903
        if (c && c != 'X') {
×
1904
            out << c;
×
1905
        }
×
1906
    }
×
1907
}
×
1908

1909
void out_hex(std::ostream& out, uint64_t val)
1910
{
×
1911
    if (is_chars(val)) {
×
1912
        out_char(out, val);
×
1913
    }
×
1914
    else {
×
1915
        out << int(val);
×
1916
    }
×
1917
}
×
1918

1919
} // namespace
1920

1921
} // namespace
1922

1923
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
1924
{
×
1925
    int indent = level * 2;
×
1926
    Allocator& alloc = node.get_alloc();
×
1927
    Array subnode(alloc);
×
1928

1929
    size_t node_size = node.size();
×
1930
    REALM_ASSERT(node_size >= 1);
×
1931

1932
    out << std::hex;
×
1933

1934
    bool node_is_leaf = !node.is_inner_bptree_node();
×
1935
    if (node_is_leaf) {
×
1936
        out << std::setw(indent) << ""
×
1937
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1938
    }
×
1939
    else {
×
1940
        out << std::setw(indent) << ""
×
1941
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
1942
    }
×
1943

1944
    subnode.init_from_ref(to_ref(node.front()));
×
1945
    out << std::setw(indent) << ""
×
1946
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
1947
    if (subnode.is_empty()) {
×
1948
        out << "no keys";
×
1949
    }
×
1950
    else {
×
1951
        out << "keys: ";
×
1952
        for (size_t i = 0; i != subnode.size(); ++i) {
×
1953
            if (i != 0)
×
1954
                out << ", ";
×
1955
            out_hex(out, uint32_t(subnode.get(i)));
×
1956
        }
×
1957
    }
×
1958
    out << ")\n";
×
1959

1960
    if (node_is_leaf) {
×
1961
        for (size_t i = 1; i != node_size; ++i) {
×
1962
            int_fast64_t value = node.get(i);
×
1963
            bool is_single_row_index = (value & 1) != 0;
×
1964
            if (is_single_row_index) {
×
1965
                out << std::setw(indent) << ""
×
1966
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
1967
                continue;
×
1968
            }
×
1969
            subnode.init_from_ref(to_ref(value));
×
1970
            bool is_subindex = subnode.get_context_flag();
×
1971
            if (is_subindex) {
×
1972
                out << std::setw(indent) << ""
×
1973
                    << "  Subindex\n";
×
1974
                dump_node_structure(subnode, out, level + 2);
×
1975
                continue;
×
1976
            }
×
1977
            IntegerColumn indexes(alloc, to_ref(value));
×
1978
            out << std::setw(indent) << ""
×
1979
                << "  List of row indexes\n";
×
1980
            indexes.dump_values(out, level + 2);
×
1981
        }
×
1982
        return;
×
1983
    }
×
1984

1985

1986
    size_t num_children = node_size - 1;
×
1987
    size_t child_ref_begin = 1;
×
1988
    size_t child_ref_end = 1 + num_children;
×
1989
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
1990
        subnode.init_from_ref(node.get_as_ref(i));
×
1991
        dump_node_structure(subnode, out, level + 1);
×
1992
    }
×
1993
}
×
1994

1995

1996
void StringIndex::do_dump_node_structure(std::ostream& out, int level) const
1997
{
×
1998
    dump_node_structure(*m_array, out, level);
×
1999
}
×
2000

2001
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc