• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / github_pull_request_275914

25 Sep 2023 03:10PM UTC coverage: 92.915% (+1.7%) from 91.215%
github_pull_request_275914

Pull #6073

Evergreen

jedelbo
Merge tag 'v13.21.0' into next-major

"Feature/Bugfix release"
Pull Request #6073: Merge next-major

96928 of 177706 branches covered (0.0%)

8324 of 8714 new or added lines in 122 files covered. (95.52%)

181 existing lines in 28 files now uncovered.

247505 of 266379 relevant lines covered (92.91%)

7164945.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.47
/src/realm/index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include <cstdio>
20
#include <iomanip>
21
#include <list>
22

23
#ifdef REALM_DEBUG
24
#include <iostream>
25
#endif
26

27
#include <realm/exceptions.hpp>
28
#include <realm/index_string.hpp>
29
#include <realm/table.hpp>
30
#include <realm/timestamp.hpp>
31
#include <realm/column_integer.hpp>
32
#include <realm/unicode.hpp>
33
#include <realm/tokenizer.hpp>
34

35
using namespace realm;
36
using namespace realm::util;
37

38
namespace {
39

40
void get_child(Array& parent, size_t child_ref_ndx, Array& child) noexcept
41
{
19,880,025✔
42
    ref_type child_ref = parent.get_as_ref(child_ref_ndx);
19,880,025✔
43
    child.init_from_ref(child_ref);
19,880,025✔
44
    child.set_parent(&parent, child_ref_ndx);
19,880,025✔
45
}
19,880,025✔
46

47
// This method reconstructs the string inserted in the search index based on a string
48
// that matches so far and the last key (only works if complete strings are stored in the index)
49
static StringData reconstruct_string(size_t offset, StringIndex::key_type key, StringData new_string)
50
{
1,086✔
51
    if (key == 0)
1,086✔
52
        return StringData();
×
53

543✔
54
    size_t rest_len = 4;
1,086✔
55
    char* k = reinterpret_cast<char*>(&key);
1,086✔
56
    if (k[0] == 'X')
1,086✔
57
        rest_len = 3;
234✔
58
    else if (k[1] == 'X')
852✔
59
        rest_len = 2;
360✔
60
    else if (k[2] == 'X')
492✔
61
        rest_len = 1;
228✔
62
    else if (k[3] == 'X')
264✔
63
        rest_len = 0;
36✔
64

543✔
65
    REALM_ASSERT(offset + rest_len <= new_string.size());
1,086✔
66

543✔
67
    return StringData(new_string.data(), offset + rest_len);
1,086✔
68
}
1,086✔
69

70

71
} // anonymous namespace
72

73
DataType ClusterColumn::get_data_type() const
74
{
×
75
    const Table* table = m_cluster_tree->get_owning_table();
×
76
    return table->get_column_type(m_column_key);
×
77
}
×
78

79
Mixed ClusterColumn::get_value(ObjKey key) const
80
{
43,612,071✔
81
    const Obj obj{m_cluster_tree->get(key)};
43,612,071✔
82
    return obj.get_any(m_column_key);
43,612,071✔
83
}
43,612,071✔
84

85
std::vector<ObjKey> ClusterColumn::get_all_keys() const
86
{
18✔
87
    std::vector<ObjKey> ret;
18✔
88
    ret.reserve(m_cluster_tree->size());
18✔
89
    m_cluster_tree->traverse([&ret](const Cluster* cluster) {
18✔
90
        auto sz = cluster->node_size();
18✔
91
        for (size_t i = 0; i < sz; i++) {
198✔
92
            ret.push_back(cluster->get_real_key(i));
180✔
93
        }
180✔
94

9✔
95
        return IteratorControl::AdvanceToNext;
18✔
96
    });
18✔
97
    return ret;
18✔
98
}
18✔
99

100
template <>
101
int64_t IndexArray::from_list<index_FindFirst>(Mixed value, InternalFindResult& /* result_ref */,
102
                                               const IntegerColumn& key_values, const ClusterColumn& column) const
103
{
5,355✔
104
    SortedListComparator slc(column);
5,355✔
105

2,745✔
106
    IntegerColumn::const_iterator it_end = key_values.cend();
5,355✔
107
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
5,355✔
108
    if (lower == it_end)
5,355✔
109
        return null_key.value;
×
110

2,745✔
111
    int64_t first_key_value = *lower;
5,355✔
112

2,745✔
113
    Mixed actual = column.get_value(ObjKey(first_key_value));
5,355✔
114
    if (actual != value)
5,355✔
115
        return null_key.value;
×
116

2,745✔
117
    return first_key_value;
5,355✔
118
}
5,355✔
119

120
template <>
121
int64_t IndexArray::from_list<index_Count>(Mixed value, InternalFindResult& /* result_ref */,
122
                                           const IntegerColumn& key_values, const ClusterColumn& column) const
123
{
8,079✔
124
    SortedListComparator slc(column);
8,079✔
125

4,041✔
126
    IntegerColumn::const_iterator it_end = key_values.cend();
8,079✔
127
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
8,079✔
128
    if (lower == it_end)
8,079✔
129
        return 0;
24✔
130

4,029✔
131
    int64_t first_key_value = *lower;
8,055✔
132

4,029✔
133
    Mixed actual = column.get_value(ObjKey(first_key_value));
8,055✔
134
    if (actual != value)
8,055✔
135
        return 0;
24✔
136

4,017✔
137
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
8,031✔
138
    int64_t cnt = upper - lower;
8,031✔
139

4,017✔
140
    return cnt;
8,031✔
141
}
8,031✔
142

143
template <>
144
int64_t IndexArray::from_list<index_FindAll_nocopy>(Mixed value, InternalFindResult& result_ref,
145
                                                    const IntegerColumn& key_values,
146
                                                    const ClusterColumn& column) const
147
{
2,133✔
148
    SortedListComparator slc(column);
2,133✔
149
    IntegerColumn::const_iterator it_end = key_values.cend();
2,133✔
150
    IntegerColumn::const_iterator lower = std::lower_bound(key_values.cbegin(), it_end, value, slc);
2,133✔
151
    if (lower == it_end)
2,133✔
152
        return size_t(FindRes_not_found);
48✔
153

1,056✔
154
    ObjKey first_key = ObjKey(*lower);
2,085✔
155

1,056✔
156
    Mixed actual = column.get_value(ObjKey(first_key));
2,085✔
157
    if (actual != value)
2,085✔
158
        return size_t(FindRes_not_found);
63✔
159

1,017✔
160
    // Optimization: check the last entry before trying upper bound.
1,017✔
161
    IntegerColumn::const_iterator upper = it_end;
2,022✔
162
    --upper;
2,022✔
163
    // Single result if upper matches lower
1,017✔
164
    if (upper == lower) {
2,022✔
165
        result_ref.payload = *lower;
315✔
166
        return size_t(FindRes_single);
315✔
167
    }
315✔
168

858✔
169
    // Check string value at upper, if equal return matches in (lower, upper]
858✔
170
    ObjKey last_key = ObjKey(*upper);
1,707✔
171
    actual = column.get_value(ObjKey(last_key));
1,707✔
172
    if (actual == value) {
1,707✔
173
        result_ref.payload = from_ref(key_values.get_ref());
1,242✔
174
        result_ref.start_ndx = lower.get_position();
1,242✔
175
        result_ref.end_ndx = upper.get_position() + 1; // one past last match
1,242✔
176
        return size_t(FindRes_column);
1,242✔
177
    }
1,242✔
178

252✔
179
    // Last result is not equal, find the upper bound of the range of results.
252✔
180
    // Note that we are passing upper which is cend() - 1 here as we already
252✔
181
    // checked the last item manually.
252✔
182
    upper = std::upper_bound(lower, upper, value, slc);
465✔
183

252✔
184
    result_ref.payload = from_ref(key_values.get_ref());
465✔
185
    result_ref.start_ndx = lower.get_position();
465✔
186
    result_ref.end_ndx = upper.get_position();
465✔
187
    return size_t(FindRes_column);
465✔
188
}
465✔
189

190

191
template <IndexMethod method>
192
int64_t IndexArray::index_string(Mixed value, InternalFindResult& result_ref, const ClusterColumn& column) const
193
{
1,124,538✔
194
    // Return`realm::not_found`, or an index to the (any) match
2,268,951✔
195
    constexpr bool first(method == index_FindFirst);
1,124,538✔
196
    // Return 0, or the number of items that match the specified `value`
2,268,951✔
197
    constexpr bool get_count(method == index_Count);
1,124,538✔
198
    // Same as `index_FindAll` but does not copy matching rows into `column`
2,268,951✔
199
    // returns FindRes_not_found if there are no matches
1,124,538✔
200
    // returns FindRes_single and the row index (literal) in result_ref.payload
1,124,538✔
201
    // or returns FindRes_column and the reference to a column of duplicates in
1,124,538✔
202
    // result_ref.result with the results in the bounds start_ndx, and end_ndx
1,124,538✔
203
    constexpr bool allnocopy(method == index_FindAll_nocopy);
1,124,538✔
204

2,268,951✔
205
    constexpr int64_t local_not_found = allnocopy ? int64_t(FindRes_not_found) : first ? null_key.value : 0;
1,124,538✔
206

2,268,951✔
207
    const char* data = m_data;
1,124,538✔
208
    const char* header;
2,268,951✔
209
    uint_least8_t width = m_width;
2,268,951✔
210
    bool is_inner_node = m_is_inner_bptree_node;
2,268,951✔
211
    typedef StringIndex::key_type key_type;
2,268,951✔
212
    size_t stringoffset = 0;
2,268,951✔
213

2,268,951✔
214
    StringConversionBuffer buffer;
1,124,538✔
215
    StringData index_data = value.get_index_data(buffer);
2,268,951✔
216

2,268,951✔
217
    // Create 4 byte index key
1,124,538✔
218
    key_type key = StringIndex::create_key(index_data, stringoffset);
1,124,538✔
219

2,268,951✔
220
    for (;;) {
1,377,210✔
221
        // Get subnode table
2,766,138✔
222
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
1,377,210✔
223

2,766,138✔
224
        // Find the position matching the key
1,377,210✔
225
        const char* offsets_header = m_alloc.translate(offsets_ref);
1,377,210✔
226
        const char* offsets_data = get_data_from_header(offsets_header);
2,766,138✔
227
        size_t offsets_size = get_size_from_header(offsets_header);
2,766,138✔
228
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
2,766,138✔
229

2,766,138✔
230
        // If key is outside range, we know there can be no match
1,377,210✔
231
        if (pos == offsets_size)
1,377,210✔
232
            return local_not_found;
1,838,634✔
233

1,382,349✔
234
        // Get entry under key
927,504✔
235
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
927,504✔
236
        uint64_t ref = get_direct(data, width, pos_refs);
1,861,587✔
237

1,861,587✔
238
        if (is_inner_node) {
927,504✔
239
            // Set vars for next iteration
960,735✔
240
            header = m_alloc.translate(to_ref(ref));
26,652✔
241
            data = get_data_from_header(header);
62,262✔
242
            width = get_width_from_header(header);
62,262✔
243
            is_inner_node = get_is_inner_bptree_node_from_header(header);
62,262✔
244
            continue;
62,262✔
245
        }
62,262✔
246

936,462✔
247
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
900,852✔
248

1,799,325✔
249
        if (stored_key != key)
900,852✔
250
            return local_not_found;
1,029,561✔
251

918,228✔
252
        // Literal row index (tagged)
769,764✔
253
        if (ref & 1) {
769,764✔
254
            int64_t key_value = int64_t(ref >> 1);
1,285,806✔
255

1,069,122✔
256
            Mixed a = column.is_fulltext() ? reconstruct_string(stringoffset, key, index_data)
535,797✔
257
                                           : column.get_value(ObjKey(key_value));
535,881✔
258
            if (a == value) {
1,069,038✔
259
                result_ref.payload = key_value;
1,067,919✔
260
                return first ? key_value : get_count ? 1 : FindRes_single;
1,066,530✔
261
            }
1,057,284✔
262
            return local_not_found;
533,139✔
263
        }
2,592✔
264

235,356✔
265
        const char* sub_header = m_alloc.translate(ref_type(ref));
233,967✔
266
        const bool sub_isindex = get_context_flag_from_header(sub_header);
450,651✔
267

450,651✔
268
        // List of row indices with common prefix up to this point, in sorted order.
233,967✔
269
        if (!sub_isindex) {
233,967✔
270
            const IntegerColumn sub(m_alloc, ref_type(ref));
224,694✔
271
            if (column.is_fulltext()) {
15,855✔
272
                result_ref.payload = ref;
7,989✔
273
                result_ref.start_ndx = 0;
288✔
274
                result_ref.end_ndx = sub.size();
288✔
275
                return FindRes_column;
288✔
276
            }
288✔
277

8,010✔
278
            return from_list<method>(value, result_ref, sub, column);
7,866✔
279
        }
15,567✔
280

233,658✔
281
        // Recurse into sub-index;
225,957✔
282
        header = sub_header;
225,957✔
283
        data = get_data_from_header(header);
434,796✔
284
        width = get_width_from_header(header);
434,796✔
285
        is_inner_node = get_is_inner_bptree_node_from_header(header);
434,796✔
286

434,796✔
287
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
225,957✔
288
        stringoffset += 4;
225,957✔
289

434,796✔
290
        // Update 4 byte index key
225,957✔
291
        key = StringIndex::create_key(index_data, stringoffset);
225,957✔
292
    }
434,796✔
293
}
1,333,377✔
294

1,144,413✔
295

296
void IndexArray::from_list_all_ins(StringData upper_value, std::vector<ObjKey>& result, const IntegerColumn& rows,
297
                                   const ClusterColumn& column) const
298
{
711✔
299
    // optimization for the most common case, where all the strings under a given subindex are equal
1,422✔
300
    Mixed first = column.get_value(ObjKey(*rows.cbegin()));
711✔
301
    Mixed last = column.get_value(ObjKey(*(rows.cend() - 1)));
1,422✔
302
    if (first == last) {
1,422✔
303
        if (!first.is_type(type_String))
762✔
304
            return;
51✔
305
        auto first_str_upper = case_map(first.get_string(), true);
51✔
306
        if (first_str_upper != upper_value) {
102✔
307
            return;
63✔
308
        }
24✔
309

51✔
310
        size_t sz = result.size() + rows.size();
39✔
311
        result.reserve(sz);
78✔
312
        for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
162✔
313
            result.push_back(ObjKey(*it));
207✔
314
        }
168✔
315
        return;
123✔
316
    }
78✔
317

699✔
318
    // special case for very long strings, where they might have a common prefix and end up in the
660✔
319
    // same subindex column, but still not be identical
660✔
320
    for (IntegerColumn::const_iterator it = rows.cbegin(); it != rows.cend(); ++it) {
33,396✔
321
        ObjKey key = ObjKey(*it);
66,132✔
322
        Mixed val = column.get_value(key);
65,472✔
323
        if (val.is_type(type_String)) {
65,472✔
324
            auto upper_str = case_map(val.get_string(), true);
65,472✔
325
            if (upper_str == upper_value) {
65,472✔
326
                result.push_back(key);
33,456✔
327
            }
1,440✔
328
        }
33,456✔
329
    }
65,472✔
330

33,396✔
331
    return;
660✔
332
}
1,320✔
333

660✔
334

335
void IndexArray::from_list_all(Mixed value, std::vector<ObjKey>& result, const IntegerColumn& rows,
336
                               const ClusterColumn& column) const
337
{
20,607✔
338
    SortedListComparator slc(column);
42,195✔
339

42,195✔
340
    IntegerColumn::const_iterator it_end = rows.cend();
20,607✔
341
    IntegerColumn::const_iterator lower = std::lower_bound(rows.cbegin(), it_end, value, slc);
42,195✔
342
    if (lower == it_end)
42,195✔
343
        return;
21,588✔
344

20,607✔
345
    ObjKey key = ObjKey(*lower);
20,607✔
346

42,195✔
347
    Mixed a = column.get_value(key);
20,607✔
348
    if (a != value)
42,195✔
349
        return;
21,588✔
350

20,607✔
351
    IntegerColumn::const_iterator upper = std::upper_bound(lower, it_end, value, slc);
20,607✔
352

42,195✔
353
    // Copy all matches into result column
20,607✔
354
    size_t sz = result.size() + (upper - lower);
20,607✔
355
    result.reserve(sz);
42,195✔
356
    for (IntegerColumn::const_iterator it = lower; it != upper; ++it) {
405,879✔
357
        result.push_back(ObjKey(*it));
766,713✔
358
    }
745,125✔
359

402,048✔
360
    return;
20,607✔
361
}
42,195✔
362

21,588✔
363

364
namespace {
365

366
// Helper functions for SearchList (index_string_all_ins) for generating permutations of index keys
367

368
// replicates the 4 least significant bits each times 8
369
// eg: abcd -> aaaaaaaabbbbbbbbccccccccdddddddd
370
uint32_t replicate_4_lsb_x8(uint32_t i)
371
{
1,561,629✔
372
    REALM_ASSERT_DEBUG(i <= 15);
3,123,501✔
373
    i *= 0x204081;
3,123,501✔
374
    i &= 0x1010101;
3,123,501✔
375
    i *= 0xff;
3,123,501✔
376
    return i;
3,123,501✔
377
}
3,123,501✔
378

1,561,872✔
379
int32_t select_from_mask(int32_t a, int32_t b, int32_t mask) {
1,561,629✔
380
    return a ^ ((a ^ b) & mask);
3,123,501✔
381
}
3,123,501✔
382

1,561,872✔
383
// Given upper and lower keys: "ABCD" and "abcd", the 4 LSBs in the permutation argument determine the
384
// final key:
385
// Permutation 0  = "ABCD"
386
// Permutation 1  = "ABCd"
387
// Permutation 8  = "aBCD"
388
// Permutation 15 = "abcd"
389
using key_type = StringIndex::key_type;
390
key_type generate_key(key_type upper, key_type lower, int permutation) {
1,561,626✔
391
    return select_from_mask(upper, lower, replicate_4_lsb_x8(permutation));
1,561,626✔
392
}
3,123,498✔
393

1,561,872✔
394

1,561,872✔
395
// Helper structure for IndexArray::index_string_all_ins to generate and keep track of search key permutations,
396
// when traversing the trees.
397
struct SearchList {
398
    struct Item {
399
        const char* header;
400
        size_t string_offset;
401
        key_type key;
402
    };
403

404
    SearchList(const util::Optional<std::string>& upper_value, const util::Optional<std::string>& lower_value)
405
        : m_upper_value(upper_value)
406
        , m_lower_value(lower_value)
407
    {
3,288✔
408
        m_keys_seen.reserve(num_permutations);
3,288✔
409
    }
6,579✔
410

3,291✔
411
    // Add all unique keys for this level to the internal work stack
3,291✔
412
    void add_all_for_level(const char* header, size_t string_offset)
413
    {
97,608✔
414
        m_keys_seen.clear();
97,608✔
415
        const key_type upper_key = StringIndex::create_key(m_upper_value, string_offset);
195,225✔
416
        const key_type lower_key = StringIndex::create_key(m_lower_value, string_offset);
195,225✔
417
        for (int p = 0; p < num_permutations; ++p) {
1,756,857✔
418
            // FIXME: This might still be incorrect due to multi-byte unicode characters (crossing the 4 byte key
1,659,249✔
419
            // size) being combined incorrectly.
3,221,121✔
420
            const key_type key = generate_key(upper_key, lower_key, p);
1,561,632✔
421
            const bool new_key = std::find(m_keys_seen.cbegin(), m_keys_seen.cend(), key) == m_keys_seen.cend();
1,561,632✔
422
            if (new_key) {
3,123,504✔
423
                m_keys_seen.push_back(key);
3,091,416✔
424
                add_next(header, string_offset, key);
3,091,416✔
425
            }
3,059,397✔
426
        }
3,091,485✔
427
    }
1,627,461✔
428

1,561,872✔
429
    bool empty() const
97,617✔
430
    {
1,532,031✔
431
        return m_items.empty();
1,532,031✔
432
    }
3,065,175✔
433

1,533,144✔
434
    Item get_next()
1,533,144✔
435
    {
1,529,193✔
436
        Item item = m_items.back();
1,529,193✔
437
        m_items.pop_back();
3,059,046✔
438
        return item;
3,059,046✔
439
    }
3,059,046✔
440

1,529,853✔
441
    // Add a single entry to the internal work stack. Used to traverse the inner trees (same key)
1,529,853✔
442
    void add_next(const char* header, size_t string_offset, key_type key)
443
    {
1,529,424✔
444
        m_items.push_back({header, string_offset, key});
1,529,424✔
445
    }
3,059,277✔
446

1,529,853✔
447
private:
1,529,853✔
448
    static constexpr int num_permutations = 1 << sizeof(key_type); // 4 bytes gives up to 16 search keys
449

450
    std::vector<Item> m_items;
451

452
    const util::Optional<std::string> m_upper_value;
453
    const util::Optional<std::string> m_lower_value;
454

455
    std::vector<key_type> m_keys_seen;
456
};
457

458

459
} // namespace
460

461

462
void IndexArray::index_string_all_ins(StringData value, std::vector<ObjKey>& result,
463
                                      const ClusterColumn& column) const
464
{
3,288✔
465
    REALM_ASSERT(!value.is_null());
3,288✔
466

6,579✔
467
    const util::Optional<std::string> upper_value = case_map(value, true);
6,579✔
468
    const util::Optional<std::string> lower_value = case_map(value, false);
3,288✔
469
    SearchList search_list(upper_value, lower_value);
6,579✔
470

6,579✔
471
    const char* top_header = get_header_from_data(m_data);
6,579✔
472
    search_list.add_all_for_level(top_header, 0);
3,288✔
473

6,579✔
474
    while (!search_list.empty()) {
1,535,805✔
475
        SearchList::Item item = search_list.get_next();
1,529,226✔
476

3,062,370✔
477
        const char* const header = item.header;
3,059,079✔
478
        const size_t string_offset = item.string_offset;
1,529,226✔
479
        const key_type key = item.key;
3,059,079✔
480
        const char* const data = get_data_from_header(header);
3,059,079✔
481
        const uint_least8_t width = get_width_from_header(header);
3,059,079✔
482
        const bool is_inner_node = get_is_inner_bptree_node_from_header(header);
3,059,079✔
483

3,059,079✔
484
        // Get subnode table
3,059,079✔
485
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
1,529,226✔
486

1,529,226✔
487
        // Find the position matching the key
3,059,079✔
488
        const char* const offsets_header = m_alloc.translate(offsets_ref);
1,529,226✔
489
        const char* const offsets_data = get_data_from_header(offsets_header);
1,529,226✔
490
        const size_t offsets_size = get_size_from_header(offsets_header);
3,059,079✔
491
        const size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
3,059,079✔
492

3,059,079✔
493
        // If key is outside range, we know there can be no match
3,059,079✔
494
        if (pos == offsets_size)
1,529,226✔
495
            continue;
405✔
496

3,058,674✔
497
        // Get entry under key
1,529,250✔
498
        const size_t pos_refs = pos + 1; // first entry in refs points to offsets
1,528,821✔
499
        const uint64_t ref = get_direct(data, width, pos_refs);
1,528,821✔
500

3,058,245✔
501
        if (is_inner_node) {
3,058,245✔
502
            // Set vars for next iteration
503
            const char* const inner_header = m_alloc.translate(to_ref(ref));
1,529,424✔
504
            search_list.add_next(inner_header, string_offset, key);
505
            continue;
×
506
        }
×
507

1,528,821✔
508
        const key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
1,528,821✔
509

1,528,821✔
510
        if (stored_key != key)
3,058,245✔
511
            continue;
1,430,490✔
512

1,627,755✔
513
        // Literal row index (tagged)
1,529,895✔
514
        if (ref & 1) {
98,331✔
515
            ObjKey k(int64_t(ref >> 1));
2,820✔
516

100,680✔
517
            // The buffer is needed when for when this is an integer index.
5,643✔
518
            StringConversionBuffer buffer;
2,820✔
519
            const StringData str = column.get_value(k).get_index_data(buffer);
2,820✔
520
            const util::Optional<std::string> upper_str = case_map(str, true);
5,643✔
521
            if (upper_str == upper_value) {
5,643✔
522
                result.push_back(k);
5,619✔
523
            }
5,619✔
524
            continue;
5,619✔
525
        }
5,619✔
526

98,334✔
527
        const char* const sub_header = m_alloc.translate(ref_type(ref));
98,334✔
528
        const bool sub_isindex = get_context_flag_from_header(sub_header);
95,511✔
529

190,548✔
530
        // List of row indices with common prefix up to this point, in sorted order.
190,548✔
531
        if (!sub_isindex) {
95,511✔
532
            const IntegerColumn sub(m_alloc, ref_type(ref));
711✔
533
            from_list_all_ins(upper_value, result, sub, column);
95,748✔
534
            continue;
1,422✔
535
        }
1,422✔
536

95,511✔
537
        // Recurse into sub-index;
95,511✔
538
        search_list.add_all_for_level(sub_header, string_offset + 4);
94,800✔
539
    }
94,800✔
540

97,614✔
541
    // sort the result and return a std::vector
97,614✔
542
    std::sort(result.begin(), result.end());
3,288✔
543
}
3,288✔
544

3,291✔
545

3,291✔
546
void IndexArray::index_string_all(Mixed value, std::vector<ObjKey>& result, const ClusterColumn& column) const
547
{
24,768✔
548
    const char* data = m_data;
24,768✔
549
    const char* header;
50,421✔
550
    uint_least8_t width = m_width;
50,421✔
551
    bool is_inner_node = m_is_inner_bptree_node;
50,421✔
552
    size_t stringoffset = 0;
50,421✔
553

50,421✔
554
    StringConversionBuffer buffer;
50,421✔
555
    StringData index_data = value.get_index_data(buffer);
24,768✔
556
    // Create 4 byte index key
50,421✔
557
    key_type key = StringIndex::create_key(index_data, stringoffset);
50,421✔
558

24,768✔
559
    for (;;) {
64,926✔
560
        // Get subnode table
39,273✔
561
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
79,020✔
562

39,273✔
563
        // Find the position matching the key
79,020✔
564
        const char* offsets_header = m_alloc.translate(offsets_ref);
39,273✔
565
        const char* offsets_data = get_data_from_header(offsets_header);
39,273✔
566
        size_t offsets_size = get_size_from_header(offsets_header);
79,020✔
567
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, key); // keys are always 32 bits wide
79,020✔
568

79,020✔
569
        // If key is outside range, we know there can be no match
79,020✔
570
        if (pos == offsets_size)
39,273✔
571
            return;
572

79,020✔
573
        // Get entry under key
39,273✔
574
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
39,273✔
575
        uint64_t ref = get_direct(data, width, pos_refs);
39,273✔
576

79,020✔
577
        if (is_inner_node) {
79,020✔
578
            // Set vars for next iteration
579
            header = m_alloc.translate(ref_type(ref));
39,747✔
580
            data = get_data_from_header(header);
581
            width = get_width_from_header(header);
×
582
            is_inner_node = get_is_inner_bptree_node_from_header(header);
×
583
            continue;
×
584
        }
×
585

39,273✔
586
        key_type stored_key = key_type(get_direct<32>(offsets_data, pos));
39,273✔
587

39,273✔
588
        if (stored_key != key)
79,020✔
589
            return;
3,060✔
590

75,960✔
591
        // Literal row index (tagged)
39,273✔
592
        if (ref & 1) {
36,213✔
593
            ObjKey k(int64_t(ref >> 1));
1,101✔
594

37,788✔
595
            Mixed a = column.get_value(k);
2,103✔
596
            if (a == value) {
1,101✔
597
                result.push_back(k);
2,103✔
598
                return;
2,103✔
599
            }
2,103✔
600
            return;
1,002✔
601
        }
1,002✔
602

35,112✔
603
        const char* sub_header = m_alloc.translate(ref_type(ref));
35,112✔
604
        const bool sub_isindex = get_context_flag_from_header(sub_header);
35,112✔
605

70,797✔
606
        // List of row indices with common prefix up to this point, in sorted order.
70,797✔
607
        if (!sub_isindex) {
35,112✔
608
            const IntegerColumn sub(m_alloc, ref_type(ref));
20,607✔
609
            return from_list_all(value, result, sub, column);
56,292✔
610
        }
42,198✔
611

36,096✔
612
        // Recurse into sub-index;
36,096✔
613
        header = sub_header;
14,505✔
614
        data = get_data_from_header(header);
14,505✔
615
        width = get_width_from_header(header);
28,599✔
616
        is_inner_node = get_is_inner_bptree_node_from_header(header);
28,599✔
617

28,599✔
618
        // Go to next key part of the string. If the offset exceeds the string length, the key will be 0
28,599✔
619
        stringoffset += 4;
14,505✔
620

14,505✔
621
        // Update 4 byte index key
28,599✔
622
        key = StringIndex::create_key(index_data, stringoffset);
14,505✔
623
    }
14,505✔
624
}
38,862✔
625

14,094✔
626
static void get_all_keys_below(std::set<int64_t>& result, ref_type ref, Allocator& alloc)
25,653✔
627
{
36✔
628
    const char* sub_header = alloc.translate(ref_type(ref));
36✔
629
    const bool sub_isindex = NodeHeader::get_context_flag_from_header(sub_header);
72✔
630

72✔
631
    if (sub_isindex) {
72✔
632
        Array tree(alloc);
18✔
633
        tree.init_from_ref(ref);
54✔
634
        auto sz = tree.size();
36✔
635
        for (size_t n = 1; n < sz; n++) {
63✔
636
            auto rot = tree.get_as_ref_or_tagged(n);
45✔
637
            // Literal row index (tagged)
72✔
638
            if (rot.is_tagged()) {
54✔
639
                result.insert(rot.get_as_int());
15✔
640
            }
42✔
641
            else {
27✔
642
                get_all_keys_below(result, rot.get_as_ref(), alloc);
27✔
643
            }
24✔
644
        }
39✔
645
    }
30✔
646
    else {
45✔
647
        IntegerColumn tree(alloc, ref);
36✔
648
        tree.for_all([&result](int64_t i) {
60✔
649
            result.insert(i);
60✔
650
        });
84✔
651
    }
60✔
652
}
78✔
653

18✔
654
void IndexArray::_index_string_find_all_prefix(std::set<int64_t>& result, StringData str, const char* header) const
36✔
655
{
24✔
656
    size_t stringoffset = 0;
24✔
657

48✔
658
    for (;;) {
60✔
659
        const char* data = NodeHeader::get_data_from_header(header);
36✔
660
        uint_least8_t width = get_width_from_header(header);
72✔
661

72✔
662
        // Create 4 byte lower and upper key
72✔
663
        key_type lower = 0;
36✔
664
        size_t i = 0;
36✔
665
        size_t n = str.size() - stringoffset;
72✔
666
        bool is_at_string_end = (n <= 4);
72✔
667
        if (!is_at_string_end) {
72✔
668
            n = 4;
48✔
669
        }
48✔
670
        while (i < n) {
153✔
671
            lower <<= 8;
117✔
672
            lower += str[stringoffset + i++];
246✔
673
        }
210✔
674
        size_t shift = (4 - i) * 8;
141✔
675
        key_type upper = lower + 1;
141✔
676
        lower <<= shift;
72✔
677
        upper <<= shift;
72✔
678
        upper--;
72✔
679

72✔
680
        // Get index array
72✔
681
        ref_type offsets_ref = to_ref(get_direct(data, width, 0));
36✔
682

36✔
683
        // Find the position matching the key
72✔
684
        const char* offsets_header = m_alloc.translate(offsets_ref);
36✔
685
        const char* offsets_data = get_data_from_header(offsets_header);
36✔
686
        size_t offsets_size = get_size_from_header(offsets_header);
72✔
687
        size_t pos = ::lower_bound<32>(offsets_data, offsets_size, lower); // keys are always 32 bits wide
72✔
688
        // If key is outside range, we know there can be no match
72✔
689
        if (pos == offsets_size)
72✔
690
            return;
691

72✔
692
        size_t pos_refs = pos + 1; // first entry in refs points to offsets
36✔
693

36✔
694
        if (NodeHeader::get_is_inner_bptree_node_from_header(header)) {
72✔
695
            bool done = false;
696
            while (!done) {
36✔
697
                // Recursively call with child node
×
698
                const char* header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs++)));
×
699
                _index_string_find_all_prefix(result, str, header);
700

×
701
                // Check if current node is past end of key range or last node
×
702
                auto key = get_direct<32>(offsets_data, pos++);
703
                done = key > upper || pos == offsets_size;
704
            }
×
705
            return;
×
706
        }
×
707

36✔
708
        size_t end = ::upper_bound<32>(offsets_data, offsets_size, upper); // keys are always 32 bits wide
36✔
709
        if (is_at_string_end) {
36✔
710
            // now get all entries from start to end
60✔
711
            for (size_t ndx = pos_refs; ndx <= end; ndx++) {
90✔
712
                uint64_t ref = get_direct(data, width, ndx);
30✔
713
                // Literal row index (tagged)
84✔
714
                if (ref & 1) {
60✔
715
                    result.emplace(int64_t(ref >> 1));
6✔
716
                }
36✔
717
                else {
30✔
718
                    get_all_keys_below(result, to_ref(ref), m_alloc);
30✔
719
                }
48✔
720
            }
54✔
721
            return;
48✔
722
        }
54✔
723

36✔
724
        // When we are not at end of string then we are comparing against the whole key
36✔
725
        // and we can have at most one match
12✔
726
        REALM_ASSERT(end == pos + 1);
12✔
727
        header = m_alloc.translate(to_ref(get_direct(data, width, pos_refs)));
12✔
728
        stringoffset += 4;
24✔
729
    }
24✔
730
}
36✔
731

12✔
732
ObjKey IndexArray::index_string_find_first(Mixed value, const ClusterColumn& column) const
24✔
733
{
788,814✔
734
    InternalFindResult unused;
788,814✔
735
    return ObjKey(index_string<index_FindFirst>(value, unused, column));
1,597,461✔
736
}
1,597,461✔
737

808,647✔
738

808,647✔
739
void IndexArray::index_string_find_all(std::vector<ObjKey>& result, Mixed value, const ClusterColumn& column,
740
                                       bool case_insensitive) const
741
{
28,056✔
742
    if (case_insensitive && value.is_type(type_String)) {
28,056✔
743
        index_string_all_ins(value.get_string(), result, column);
32,232✔
744
    }
32,232✔
745
    else {
28,059✔
746
        index_string_all(value, result, column);
28,059✔
747
    }
50,421✔
748
}
53,709✔
749

25,653✔
750
FindRes IndexArray::index_string_find_all_no_copy(Mixed value, const ClusterColumn& column,
28,944✔
751
                                                  InternalFindResult& result) const
752
{
329,298✔
753
    return static_cast<FindRes>(index_string<index_FindAll_nocopy>(value, result, column));
329,298✔
754
}
658,599✔
755

329,301✔
756
size_t IndexArray::index_string_count(Mixed value, const ClusterColumn& column) const
329,301✔
757
{
6,432✔
758
    InternalFindResult unused;
6,432✔
759
    return to_size_t(index_string<index_Count>(value, unused, column));
12,864✔
760
}
12,864✔
761

6,432✔
762
std::unique_ptr<IndexArray> StringIndex::create_node(Allocator& alloc, bool is_leaf)
6,432✔
763
{
87,624✔
764
    Array::Type type = is_leaf ? Array::type_HasRefs : Array::type_InnerBptreeNode;
87,624✔
765
    std::unique_ptr<IndexArray> top(new IndexArray(alloc)); // Throws
177,777✔
766
    top->create(type);                                      // Throws
177,738✔
767

177,777✔
768
    // Mark that this is part of index
177,777✔
769
    // (as opposed to columns under leaves)
87,624✔
770
    top->set_context_flag(true);
87,624✔
771

87,624✔
772
    // Add subcolumns for leaves
177,777✔
773
    Array values(alloc);
87,624✔
774
    values.create(Array::type_Normal);       // Throws
87,624✔
775
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
177,777✔
776
    top->add(values.get_ref());              // first entry in refs points to offsets
177,777✔
777

177,777✔
778
    return top;
177,777✔
779
}
87,624✔
780

90,153✔
781
void StringIndex::set_target(const ClusterColumn& target_column) noexcept
90,153✔
782
{
783
    m_target_column = target_column;
784
}
64,476✔
785

64,476✔
786

64,476✔
787
StringIndex::key_type StringIndex::get_last_key() const
64,476✔
788
{
88,806✔
789
    Array offsets(m_array->get_alloc());
24,330✔
790
    get_child(*m_array, 0, offsets);
24,330✔
791
    return key_type(offsets.back());
24,330✔
792
}
4,664,661✔
793

794

4,640,331✔
795
void StringIndex::insert_with_offset(ObjKey obj_key, StringData index_data, const Mixed& value, size_t offset)
4,640,331✔
796
{
9,261,042✔
797
    // Create 4 byte index key
4,620,711✔
798
    key_type key = create_key(index_data, offset);
4,620,711✔
799
    TreeInsert(obj_key, key, offset, index_data, value); // Throws
4,620,711✔
800
}
5,657,205✔
801

1,036,494✔
802
void StringIndex::insert_to_existing_list_at_lower(ObjKey key, Mixed value, IntegerColumn& list,
803
                                                   const IntegerColumnIterator& lower)
804
{
1,038,306✔
805
    SortedListComparator slc(m_target_column);
2,074,800✔
806
    // At this point there exists duplicates of this value, we need to
1,038,306✔
807
    // insert value beside it's duplicates so that rows are also sorted
1,038,306✔
808
    // in ascending order.
1,038,306✔
809
    IntegerColumn::const_iterator upper = std::upper_bound(lower, list.cend(), value, slc);
2,074,800✔
810
    // find insert position (the list has to be kept in sorted order)
2,074,800✔
811
    // In most cases the refs will be added to the end. So we test for that
2,074,800✔
812
    // first to see if we can avoid the binary search for insert position
2,056,896✔
813
    IntegerColumn::const_iterator last = upper - ptrdiff_t(1);
2,056,896✔
814
    int64_t last_key_value = *last;
1,056,210✔
815
    if (key.value >= last_key_value) {
1,056,210✔
816
        list.insert(upper.get_position(), key.value);
1,038,762✔
817
    }
1,038,762✔
818
    else {
1,053,942✔
819
        IntegerColumn::const_iterator inner_lower = std::lower_bound(lower, upper, key.value);
17,448✔
820
        list.insert(inner_lower.get_position(), key.value);
17,448✔
821
    }
18,318✔
822
}
1,039,176✔
823

870✔
824
void StringIndex::insert_to_existing_list(ObjKey key, Mixed value, IntegerColumn& list)
870✔
825
{
993✔
826
    SortedListComparator slc(m_target_column);
1,863✔
827
    IntegerColumn::const_iterator it_end = list.cend();
993✔
828
    IntegerColumn::const_iterator lower = std::lower_bound(list.cbegin(), it_end, value, slc);
1,710✔
829

1,710✔
830
    if (lower == it_end) {
1,146✔
831
        // Not found and everything is less, just append it to the end.
903✔
832
        list.add(key.value);
903✔
833
    }
750✔
834
    else {
396✔
835
        ObjKey lower_key = ObjKey(*lower);
396✔
836
        Mixed lower_value = get(lower_key);
396✔
837

243✔
838
        if (lower_value != value) {
243✔
839
            list.insert(lower.get_position(), key.value);
243✔
840
        }
243✔
841
        else {
×
842
            // At this point there exists duplicates of this value, we need to
×
843
            // insert value beside it's duplicates so that rows are also sorted
153✔
844
            // in ascending order.
870✔
845
            insert_to_existing_list_at_lower(key, value, list, lower);
846
        }
847
    }
243✔
848
}
2,637✔
849

1,644✔
850

851
void StringIndex::insert_row_list(size_t ref, size_t offset, StringData index_data)
852
{
2,388✔
853
    REALM_ASSERT(!m_array->is_inner_bptree_node()); // only works in leaves
744✔
854

744✔
855
    // Create 4 byte index key
2,388✔
856
    key_type key = create_key(index_data, offset);
2,388✔
857

2,388✔
858
    // Get subnode table
2,388✔
859
    Allocator& alloc = m_array->get_alloc();
744✔
860
    Array values(alloc);
2,388✔
861
    get_child(*m_array, 0, values);
2,388✔
862
    REALM_ASSERT(m_array->size() == values.size() + 1);
744✔
863

2,388✔
864
    size_t ins_pos = values.lower_bound_int(key);
2,388✔
865
    if (ins_pos == values.size()) {
2,388✔
866
        // When key is outside current range, we can just add it
2,388✔
867
        values.add(key);
744✔
868
        m_array->add(ref);
744✔
869
        return;
744✔
870
    }
744✔
871

×
872
#ifdef REALM_DEBUG // LCOV_EXCL_START ignore debug code
×
873
    // Since we only use this for moving existing values to new
×
874
    // subindexes, there should never be an existing match.
875
    key_type k = key_type(values.get(ins_pos));
876
    REALM_ASSERT(k != key);
×
877
#endif // LCOV_EXCL_STOP ignore debug code
×
878

×
879
    // If key is not present we add it at the correct location
880
    values.insert(ins_pos, key);
881
    m_array->insert(ins_pos + 1, ref);
882
}
4,640,190✔
883

4,640,190✔
884

4,640,190✔
885
void StringIndex::TreeInsert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value)
4,639,983✔
886
{
9,257,898✔
887
    NodeChange nc = do_insert(obj_key, key, offset, index_data, value);
4,617,915✔
888
    switch (nc.type) {
4,617,915✔
889
        case NodeChange::change_None:
4,622,667✔
890
            return;
4,622,667✔
891
        case NodeChange::change_InsertBefore: {
×
892
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
×
893
            new_node.node_add_key(nc.ref1);
×
894
            new_node.node_add_key(get_ref());
×
895
            m_array->init_from_ref(new_node.get_ref());
3✔
896
            m_array->update_parent();
3✔
897
            return;
3✔
898
        }
3✔
899
        case NodeChange::change_InsertAfter: {
6✔
900
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
6✔
901
            new_node.node_add_key(get_ref());
6✔
902
            new_node.node_add_key(nc.ref1);
3✔
903
            m_array->init_from_ref(new_node.get_ref());
39✔
904
            m_array->update_parent();
39✔
905
            return;
39✔
906
        }
36✔
907
        case NodeChange::change_Split: {
63✔
908
            StringIndex new_node(inner_node_tag(), m_array->get_alloc());
63✔
909
            new_node.node_add_key(nc.ref1);
63✔
910
            new_node.node_add_key(nc.ref2);
27✔
911
            m_array->init_from_ref(new_node.get_ref());
27✔
912
            m_array->update_parent();
27✔
913
            return;
27✔
914
        }
915
    }
916
    REALM_ASSERT(false); // LCOV_EXCL_LINE; internal Realm error
917
}
918

4,672,251✔
919

4,672,251✔
920
StringIndex::NodeChange StringIndex::do_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data,
4,672,251✔
921
                                               const Mixed& value)
922
{
4,675,824✔
923
    Allocator& alloc = m_array->get_alloc();
4,675,824✔
924
    if (m_array->is_inner_bptree_node()) {
4,675,824✔
925
        // Get subnode table
21,882✔
926
        Array keys(alloc);
21,882✔
927
        get_child(*m_array, 0, keys);
53,892✔
928
        REALM_ASSERT(m_array->size() == keys.size() + 1);
53,892✔
929

21,882✔
930
        // Find the subnode containing the item
31,224✔
931
        size_t node_ndx = keys.lower_bound_int(key);
31,224✔
932
        if (node_ndx == keys.size()) {
21,882✔
933
            // node can never be empty, so try to fit in last item
8,337✔
934
            node_ndx = keys.size() - 1;
40,347✔
935
        }
40,347✔
936

53,892✔
937
        // Get sublist
21,882✔
938
        size_t refs_ndx = node_ndx + 1; // first entry in refs points to offsets
21,882✔
939
        ref_type ref = m_array->get_as_ref(refs_ndx);
53,892✔
940
        StringIndex target(ref, m_array.get(), refs_ndx, m_target_column, alloc);
53,892✔
941

21,882✔
942
        // Insert item
53,868✔
943
        NodeChange nc = target.do_insert(obj_key, key, offset, index_data, value);
53,868✔
944
        if (nc.type == NodeChange::change_None) {
53,868✔
945
            // update keys
53,856✔
946
            key_type last_key = target.get_last_key();
21,870✔
947
            keys.set(node_ndx, last_key);
21,894✔
948
            return NodeChange::change_None; // no new nodes
21,879✔
949
        }
21,879✔
950

21✔
951
        if (nc.type == NodeChange::change_InsertAfter) {
12✔
952
            ++node_ndx;
6✔
953
            ++refs_ndx;
30✔
954
        }
30✔
955

27✔
956
        // If there is room, just update node directly
27✔
957
        if (keys.size() < REALM_MAX_BPNODE_SIZE) {
21✔
958
            if (nc.type == NodeChange::change_Split) {
21✔
959
                node_insert_split(node_ndx, nc.ref2);
15✔
960
            }
30✔
961
            else {
30✔
962
                node_insert(node_ndx, nc.ref1); // ::INSERT_BEFORE/AFTER
6✔
963
            }
6✔
964
            return NodeChange::change_None;
12✔
965
        }
12!
966

967
        // Else create new node
×
968
        StringIndex new_node(inner_node_tag(), alloc);
×
969
        if (nc.type == NodeChange::change_Split) {
970
            // update offset for left node
×
971
            key_type last_key = target.get_last_key();
×
972
            keys.set(node_ndx, last_key);
×
973

×
974
            new_node.node_add_key(nc.ref2);
×
975
            ++node_ndx;
×
976
            ++refs_ndx;
×
977
        }
978
        else {
×
979
            new_node.node_add_key(nc.ref1);
×
980
        }
×
981

×
982
        switch (node_ndx) {
×
983
            case 0: // insert before
×
984
                return NodeChange(NodeChange::change_InsertBefore, new_node.get_ref());
×
985
            case REALM_MAX_BPNODE_SIZE: // insert after
×
986
                if (nc.type == NodeChange::change_Split)
987
                    return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
×
988
                return NodeChange(NodeChange::change_InsertAfter, new_node.get_ref());
×
989
            default: // split
×
990
                // Move items after split to new node
×
991
                size_t len = m_array->size();
×
992
                for (size_t i = refs_ndx; i < len; ++i) {
×
993
                    ref_type ref_i = m_array->get_as_ref(i);
×
994
                    new_node.node_add_key(ref_i);
×
995
                }
4,640,241✔
996
                keys.truncate(node_ndx);
4,640,241✔
997
                m_array->truncate(refs_ndx);
4,640,241✔
998
                return NodeChange(NodeChange::change_Split, get_ref(), new_node.get_ref());
999
        }
9,262,173✔
1000
    }
9,262,173✔
1001
    else {
9,262,173✔
1002
        // Is there room in the list?
9,262,173✔
1003
        Array old_keys(alloc);
4,621,932✔
1004
        get_child(*m_array, 0, old_keys);
9,262,173✔
1005
        const size_t old_offsets_size = old_keys.size();
4,621,932✔
1006
        REALM_ASSERT_EX(m_array->size() == old_offsets_size + 1, m_array->size(), old_offsets_size + 1);
4,621,932✔
1007

4,621,932✔
1008
        bool noextend = old_offsets_size >= REALM_MAX_BPNODE_SIZE;
9,262,173✔
1009

9,261,831✔
1010
        // See if we can fit entry into current leaf
4,621,932✔
1011
        // Works if there is room or it can join existing entries
4,621,932✔
1012
        if (leaf_insert(obj_key, key, offset, index_data, value, noextend))
4,622,274✔
1013
            return NodeChange::change_None;
4,623,423✔
1014

2,147,483,989✔
1015
        // Create new list for item (a leaf)
2,147,483,647✔
1016
        StringIndex new_list(m_target_column, alloc);
2,147,483,989✔
1017

2,147,483,647✔
1018
        new_list.leaf_insert(obj_key, key, offset, index_data, value);
2,147,483,647✔
1019

2,147,483,989✔
1020
        size_t ndx = old_keys.lower_bound_int(key);
2,147,483,647✔
1021

2,147,483,647✔
1022
        // insert before
2,147,483,647✔
1023
        if (ndx == 0)
2,147,483,989✔
1024
            return NodeChange(NodeChange::change_InsertBefore, new_list.get_ref());
12✔
1025

2,147,483,647✔
1026
        // insert after
2,147,483,647✔
1027
        if (ndx == old_offsets_size)
2,147,483,977✔
1028
            return NodeChange(NodeChange::change_InsertAfter, new_list.get_ref());
339✔
1029

2,147,483,647✔
1030
        // split
2,147,507,362✔
1031
        Array new_keys(alloc);
2,147,507,032✔
1032
        get_child(*new_list.m_array, 0, new_keys);
2,147,507,032✔
1033
        // Move items after split to new list
2,147,483,647✔
1034
        for (size_t i = ndx; i < old_offsets_size; ++i) {
2,147,507,032✔
1035
            int64_t v2 = old_keys.get(i);
39,747✔
1036
            int64_t v3 = m_array->get(i + 1);
39,747✔
1037

16,692✔
1038
            new_keys.add(v2);
16,692✔
1039
            new_list.m_array->add(v3);
16,362✔
1040
        }
16,692✔
1041
        old_keys.truncate(ndx);
2,147,483,977✔
1042
        m_array->truncate(ndx + 1);
2,152,155,898✔
1043

2,147,483,647✔
1044
        return NodeChange(NodeChange::change_Split, get_ref(), new_list.get_ref());
2,147,483,647✔
1045
    }
2,147,483,647✔
1046
}
4,643,829✔
1047

15✔
1048

15✔
1049
void StringIndex::node_insert_split(size_t ndx, size_t new_ref)
1050
{
21✔
1051
    REALM_ASSERT(m_array->is_inner_bptree_node());
21✔
1052
    REALM_ASSERT(new_ref);
21✔
1053

6✔
1054
    Allocator& alloc = m_array->get_alloc();
21✔
1055
    Array offsets(alloc);
21✔
1056
    get_child(*m_array, 0, offsets);
21✔
1057

6✔
1058
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
6✔
1059
    REALM_ASSERT(ndx < offsets.size());
21✔
1060
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
21✔
1061

21✔
1062
    // Get sublists
21✔
1063
    size_t refs_ndx = ndx + 1; // first entry in refs points to offsets
6✔
1064
    ref_type orig_ref = m_array->get_as_ref(refs_ndx);
6✔
1065
    StringIndex orig_col(orig_ref, m_array.get(), refs_ndx, m_target_column, alloc);
21✔
1066
    StringIndex new_col(new_ref, nullptr, 0, m_target_column, alloc);
21✔
1067

6✔
1068
    // Update original key
6✔
1069
    key_type last_key = orig_col.get_last_key();
21✔
1070
    offsets.set(ndx, last_key);
21✔
1071

21✔
1072
    // Insert new ref
21✔
1073
    key_type new_key = new_col.get_last_key();
6✔
1074
    offsets.insert(ndx + 1, new_key);
6✔
1075
    m_array->insert(ndx + 2, new_ref);
6✔
1076
}
15✔
1077

9✔
1078

9✔
1079
void StringIndex::node_insert(size_t ndx, size_t ref)
1080
{
15✔
1081
    REALM_ASSERT(ref);
15✔
1082
    REALM_ASSERT(m_array->is_inner_bptree_node());
15✔
1083

15✔
1084
    Allocator& alloc = m_array->get_alloc();
6✔
1085
    Array offsets(alloc);
15✔
1086
    get_child(*m_array, 0, offsets);
15✔
1087
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
6✔
1088

15✔
1089
    REALM_ASSERT(ndx <= offsets.size());
15✔
1090
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE);
6✔
1091

15✔
1092
    StringIndex col(ref, nullptr, 0, m_target_column, alloc);
15✔
1093
    key_type last_key = col.get_last_key();
15✔
1094

6✔
1095
    offsets.insert(ndx, last_key);
6✔
1096
    m_array->insert(ndx + 1, ref);
6✔
1097
}
4,638,894✔
1098

4,638,888✔
1099
bool StringIndex::leaf_insert(ObjKey obj_key, key_type key, size_t offset, StringData index_data, const Mixed& value,
1100
                              bool noextend)
1101
{
9,255,684✔
1102
    REALM_ASSERT(!m_array->is_inner_bptree_node());
9,255,684✔
1103

9,255,684✔
1104
    // Get subnode table
9,255,684✔
1105
    Allocator& alloc = m_array->get_alloc();
4,616,796✔
1106
    Array keys(alloc);
4,616,796✔
1107
    get_child(*m_array, 0, keys);
4,616,796✔
1108
    REALM_ASSERT(m_array->size() == keys.size() + 1);
9,255,684✔
1109

4,616,796✔
1110
    // If we are keeping the complete string in the index
9,255,684✔
1111
    // we want to know if this is the last part
9,255,684✔
1112
    bool is_at_string_end = offset + 4 >= index_data.size();
4,616,796✔
1113

9,255,684✔
1114
    size_t ins_pos = keys.lower_bound_int(key);
4,786,998✔
1115
    size_t ins_pos_refs = ins_pos + 1; // first entry in refs points to offsets
4,616,808✔
1116

4,616,796✔
1117
    if (ins_pos == keys.size()) {
4,616,796✔
1118
        if (noextend)
334,947✔
1119
            return false;
170,199✔
1120

334,536✔
1121
        // When key is outside current range, we can just add it
334,536✔
1122
        keys.add(key);
334,536✔
1123
        if (!m_target_column.is_fulltext() || is_at_string_end) {
165,150✔
1124
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
164,352✔
1125
            m_array->add(shifted);
164,754✔
1126
        }
164,754✔
1127
        else {
798✔
1128
            // create subindex for rest of string
798✔
1129
            StringIndex subindex(m_target_column, m_array->get_alloc());
170,586✔
1130
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
170,586✔
1131
            m_array->add(subindex.get_ref());
396✔
1132
        }
4,469,082✔
1133
        return true;
164,748✔
1134
    }
164,748✔
1135

8,920,725✔
1136
    key_type k = key_type(keys.get(ins_pos));
4,818,330✔
1137

4,452,090✔
1138
    // If key is not present we add it at the correct location
4,452,039✔
1139
    if (k != key) {
4,818,279✔
1140
        if (noextend)
713,751✔
1141
            return false;
365,421✔
1142

712,866✔
1143
        keys.insert(ins_pos, key);
712,866✔
1144
        if (!m_target_column.is_fulltext() || is_at_string_end) {
348,330✔
1145
            int64_t shifted = int64_t((uint64_t(obj_key.value) << 1) + 1); // shift to indicate literal
346,569✔
1146
            m_array->insert(ins_pos_refs, shifted);
347,421✔
1147
        }
347,421✔
1148
        else {
1,761✔
1149
            // create subindex for rest of string
1,761✔
1150
            StringIndex subindex(m_target_column, m_array->get_alloc());
367,149✔
1151
            subindex.insert_with_offset(obj_key, index_data, value, offset + 4);
367,149✔
1152
            m_array->insert(ins_pos_refs, subindex.get_ref());
909✔
1153
        }
909✔
1154
        return true;
347,478✔
1155
    }
4,449,873✔
1156

8,206,923✔
1157
    // This leaf already has a slot for for the key
4,104,528✔
1158

4,104,528✔
1159
    uint64_t slot_value = uint64_t(m_array->get(ins_pos_refs));
8,206,923✔
1160
    size_t suboffset = offset + s_index_key_length;
4,129,887✔
1161

4,129,605✔
1162
    // Single match (lowest bit set indicates literal row_ndx)
4,129,887✔
1163
    if ((slot_value & 1) != 0) {
4,104,528✔
1164
        ObjKey obj_key2 = ObjKey(int64_t(slot_value >> 1));
25,089✔
1165
        Mixed v2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data) : get(obj_key2);
25,089✔
1166
        if (v2 == value) {
25,089✔
1167
            // Strings are equal but this is not a list.
9,498✔
1168
            // Create a list and add both rows.
9,498✔
1169

8,925✔
1170
            // convert to list (in sorted order)
8,925✔
1171
            Array row_list(alloc);
9,498✔
1172
            row_list.create(Array::type_Normal); // Throws
9,498✔
1173
            row_list.add(obj_key < obj_key2 ? obj_key.value : obj_key2.value);
25,461✔
1174
            row_list.add(obj_key < obj_key2 ? obj_key2.value : obj_key.value);
25,461✔
1175
            m_array->set(ins_pos_refs, row_list.get_ref());
25,461✔
1176
        }
25,461✔
1177
        else {
20,289✔
1178
            StringConversionBuffer buffer;
20,289✔
1179
            auto index_data_2 = v2.get_index_data(buffer);
20,532✔
1180
            if (index_data == index_data_2 || suboffset > s_max_offset) {
20,532✔
1181
                // These strings have the same prefix up to this point but we
522✔
1182
                // don't want to recurse further, create a list in sorted order.
411✔
1183
                bool row_ndx_first = value.compare_signed(v2) < 0;
411✔
1184
                Array row_list(alloc);
522✔
1185
                row_list.create(Array::type_Normal); // Throws
522✔
1186
                row_list.add(row_ndx_first ? obj_key.value : obj_key2.value);
20,697✔
1187
                row_list.add(row_ndx_first ? obj_key2.value : obj_key.value);
279✔
1188
                m_array->set(ins_pos_refs, row_list.get_ref());
279✔
1189
            }
279✔
1190
            else {
40,428✔
1191
                // These strings have the same prefix up to this point but they
40,428✔
1192
                // are actually not equal. Extend the tree recursivly until the
40,428✔
1193
                // prefix of these strings is different.
20,010✔
1194
                StringIndex subindex(m_target_column, m_array->get_alloc());
40,428✔
1195
                subindex.insert_with_offset(obj_key2, index_data_2, v2, suboffset);
40,428✔
1196
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
40,671✔
1197
                // Join the string of SubIndices to the current position of m_array
45,369✔
1198
                m_array->set(ins_pos_refs, subindex.get_ref());
45,369✔
1199
            }
20,010✔
1200
        }
20,289✔
1201
        return true;
25,089✔
1202
    }
4,102,125✔
1203

8,156,475✔
1204
    // If there already is a list of matches, we see if we fit there
8,156,475✔
1205
    // or it has to be split into a subindex
5,118,357✔
1206
    ref_type ref = ref_type(slot_value);
5,118,357✔
1207
    char* header = alloc.translate(ref);
4,079,439✔
1208
    if (!Array::get_context_flag_from_header(header)) {
5,118,357✔
1209
        IntegerColumn sub(alloc, ref); // Throws
2,078,667✔
1210
        sub.set_parent(m_array.get(), ins_pos_refs);
1,039,749✔
1211

2,078,691✔
1212
        IntegerColumn::const_iterator it_end = sub.cend();
2,078,691✔
1213
        IntegerColumn::const_iterator lower = it_end;
1,039,923✔
1214

1,039,923✔
1215
        auto value_exists_in_list = [&]() {
2,078,901✔
1216
            if (m_target_column.is_fulltext()) {
2,078,901✔
1217
                return reconstruct_string(offset, key, index_data) == value.get_string();
174✔
1218
            }
1,038,942✔
1219
            SortedListComparator slc(m_target_column);
2,076,885✔
1220
            lower = std::lower_bound(sub.cbegin(), it_end, value, slc);
2,076,885✔
1221

2,076,288✔
1222
            if (lower != it_end) {
2,076,288✔
1223
                Mixed lower_value = get(ObjKey(*lower));
1,040,970✔
1224
                if (lower_value == value) {
1,040,970✔
1225
                    return true;
1,040,619✔
1226
                }
1,038,180✔
1227
            }
1,779✔
1228
            return false;
1,040,697✔
1229
        };
1,038,282✔
1230

2,076,252✔
1231
        // If we found the value in this list, add the duplicate to the list.
1,042,164✔
1232
        if (value_exists_in_list()) {
1,039,749✔
1233
            insert_to_existing_list_at_lower(obj_key, value, sub, lower);
1,038,330✔
1234
        }
1,038,330✔
1235
        else {
1,419✔
1236
            // If the list only stores duplicates we are free to branch and
1,419✔
1237
            // and create a sub index with this existing list as one of the
3,834✔
1238
            // leafs, but if the list doesn't only contain duplicates we
3,834✔
1239
            // must respect that we store a common key prefix up to this
1,422✔
1240
            // point and insert into the existing list.
3,831✔
1241
            ObjKey key_of_any_dup = ObjKey(sub.get(0));
3,930✔
1242
            StringConversionBuffer buffer;
2,289✔
1243
            StringData index_data_2 = m_target_column.is_fulltext() ? reconstruct_string(offset, key, index_data)
2,289✔
1244
                                                                    : get(key_of_any_dup).get_index_data(buffer);
2,964✔
1245
            if (index_data == index_data_2 || suboffset > s_max_offset) {
3,279✔
1246
                insert_to_existing_list(obj_key, value, sub);
2,538✔
1247
            }
2,634✔
1248
            else {
1,707✔
1249
#ifdef REALM_DEBUG
1,707✔
1250
                bool contains_only_duplicates = true;
1,707✔
1251
                if (!m_target_column.is_fulltext() && sub.size() > 1) {
2,022✔
1252
                    ObjKey first_key = ObjKey(sub.get(0));
372✔
1253
                    ObjKey last_key = ObjKey(sub.back());
372✔
1254
                    auto first = get(first_key);
372✔
1255
                    auto last = get(last_key);
1,653✔
1256
                    // Since the list is kept in sorted order, the first and
372✔
1257
                    // last values will be the same only if the whole list is
372✔
1258
                    // storing duplicate values.
1,653✔
1259
                    if (first != last) {
1,917✔
1260
                        contains_only_duplicates = false; // LCOV_EXCL_LINE
1,545✔
1261
                    }
1262
                }
1,917✔
1263
                REALM_ASSERT_DEBUG(contains_only_duplicates);
1,971✔
1264
#endif
1,971✔
1265
                // The buffer is needed for when this is an integer index.
1,971✔
1266
                StringIndex subindex(m_target_column, m_array->get_alloc());
1,971✔
1267
                subindex.insert_row_list(sub.get_ref(), suboffset, index_data_2);
2,841✔
1268
                subindex.insert_with_offset(obj_key, index_data, value, suboffset);
1,039,344✔
1269
                m_array->set(ins_pos_refs, subindex.get_ref());
1,039,344✔
1270
            }
426✔
1271
        }
1,419✔
1272
        return true;
4,077,867✔
1273
    }
4,077,867✔
1274

3,039,690✔
1275
    // The key matches, but there is a subindex here so go down a level in the tree.
6,077,808✔
1276
    StringIndex subindex(ref, m_array.get(), ins_pos_refs, m_target_column, alloc);
6,077,808✔
1277
    subindex.insert_with_offset(obj_key, index_data, value, suboffset);
3,039,690✔
1278

3,039,690✔
1279
    return true;
4,925,415✔
1280
}
4,925,415✔
1281

1,885,725✔
1282
Mixed StringIndex::get(ObjKey key) const
1283
{
1,853,877✔
1284
    return m_target_column.get_value(key);
2,341,347✔
1285
}
2,341,347✔
1286

487,470✔
1287
void StringIndex::erase(ObjKey key)
487,470✔
1288
{
457,488✔
1289
    StringConversionBuffer buffer;
458,682✔
1290
    std::string_view value{(get(key).get_index_data(buffer))};
458,682✔
1291
    if (m_target_column.is_fulltext()) {
458,682✔
1292
        auto words = Tokenizer::get_instance()->reset(value).get_all_tokens();
72✔
1293
        for (auto& w : words) {
488,664✔
1294
            erase_string(key, w);
488,664✔
1295
        }
488,664✔
1296
    }
487,506✔
1297
    else {
457,416✔
1298
        erase_string(key, value);
457,416✔
1299
    }
457,416✔
1300
}
457,452✔
1301

138✔
1302
namespace {
138✔
1303
template <typename T>
90✔
1304
void intersect(std::vector<ObjKey>& result, T& keys)
249✔
1305
{
387✔
1306
    if (result.empty()) {
387✔
1307
        result.reserve(keys.size());
180✔
1308
        for (auto k : keys) {
297✔
1309
            result.emplace_back(k);
297✔
1310
        }
297✔
1311
    }
138✔
1312
    else {
48✔
1313
        auto it = result.begin();
48✔
1314
        auto keep = it;
225✔
1315
        auto m = keys.begin();
177✔
1316

177✔
1317
        // only keep intersection
57✔
1318
        while (it != result.end() && m != keys.end()) {
186✔
1319
            int64_t int_val = *m;
249✔
1320
            if (it->value < int_val) {
180✔
1321
                it++; // don't keep if match is not in new set
60✔
1322
            }
78✔
1323
            else if (it->value > int_val) {
120✔
1324
                ++m; // ignore new matches
120✔
1325
            }
54✔
1326
            else {
138✔
1327
                // Found both places - make sure it is kept
138✔
1328
                if (keep < it)
138✔
1329
                    *keep = *it;
72✔
1330
                ++keep;
198✔
1331
                ++it;
117✔
1332
                ++m;
81✔
1333
            }
81✔
1334
        }
177✔
1335
        if (keep != result.end()) {
186✔
1336
            result.erase(keep, result.end());
12✔
1337
        }
12✔
1338
    }
48✔
1339
}
138✔
1340

1341
struct FindResWrapper {
1342
    InternalFindResult& res;
72✔
1343
    IntegerColumn& indexes;
72✔
1344
    size_t n = 0;
72✔
1345
    size_t size()
1346
    {
186✔
1347
        return res.end_ndx - res.start_ndx;
186✔
1348
    }
186✔
1349
    auto begin()
1350
    {
306✔
1351
        return indexes.cbegin();
306✔
1352
    }
306✔
1353
    auto end()
1354
    {
192✔
1355
        return indexes.cend();
192✔
1356
    }
192✔
1357
};
180✔
1358
} // namespace
180✔
1359

180✔
1360
void StringIndex::find_all_fulltext(std::vector<ObjKey>& result, StringData value) const
1361
{
360✔
1362
    InternalFindResult res;
360✔
1363
    REALM_ASSERT(result.empty());
360✔
1364

360✔
1365
    auto tokenizer = Tokenizer::get_instance();
192✔
1366
    tokenizer->reset({value.data(), value.size()});
183✔
1367
    auto [includes, excludes] = tokenizer->get_search_tokens();
183✔
1368
    if (includes.empty()) {
189✔
1369
        if (excludes.empty()) {
21✔
1370
            throw InvalidArgument("Missing search token");
171✔
1371
        }
210✔
1372
        result = m_target_column.get_all_keys();
216✔
1373
    }
33✔
1374
    else {
192✔
1375
        for (auto& token : includes) {
231✔
1376
            if (token.back() == '*') {
231✔
1377
                std::set<int64_t> keys;
207✔
1378
                m_array->index_string_find_all_prefix(keys, StringData(token.data(), token.size() - 1));
207✔
1379
                intersect(result, keys);
30✔
1380
            }
30✔
1381
            else {
189✔
1382
                switch (find_all_no_copy(StringData{token}, res)) {
297✔
1383
                    case FindRes_not_found:
120✔
1384
                        result.clear();
120✔
1385
                        break;
120✔
1386
                    case FindRes_column: {
228✔
1387
                        IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
114✔
1388
                        FindResWrapper wrapper{res, indexes};
177✔
1389
                        intersect(result, wrapper);
114✔
1390
                        break;
177✔
1391
                    }
48✔
1392
                    case FindRes_single:
111✔
1393
                        // merge in single res
78✔
1394
                        if (result.empty()) {
78✔
1395
                            result.emplace_back(res.payload);
63✔
1396
                        }
63✔
1397
                        else {
30✔
1398
                            ObjKey key(res.payload);
30✔
1399
                            auto pos = std::lower_bound(result.begin(), result.end(), key);
30✔
1400
                            if (pos != result.end() && key == *pos) {
15✔
1401
                                result.clear();
15✔
1402
                                result.push_back(key);
15✔
1403
                            }
30✔
1404
                            else {
63✔
1405
                                result.clear();
207✔
1406
                            }
207✔
1407
                        }
222✔
1408
                        break;
72✔
1409
                }
414✔
1410
            }
375✔
1411
            if (result.empty())
207✔
1412
                return;
177✔
1413
        }
255✔
1414
    }
168✔
1415

180✔
1416
    for (auto& token : excludes) {
228✔
1417
        if (token.back() == '*') {
48✔
1418
            throw IllegalOperation("Exclude by prefix is not implemented");
1419
        }
48✔
1420
        if (result.empty())
48✔
1421
            return;
1422

48✔
1423
        switch (find_all_no_copy(StringData{token}, res)) {
78✔
1424
            case FindRes_not_found:
30✔
1425
                // Nothing to exclude
1426
                break;
30✔
1427
            case FindRes_column: {
60✔
1428
                IntegerColumn indexes(m_array->get_alloc(), ref_type(res.payload));
60✔
1429

60✔
1430
                auto it = result.begin();
30✔
1431
                auto keep = it;
225✔
1432
                size_t m = res.start_ndx;
195✔
1433
                auto idx_val = indexes.get(m);
30✔
1434

108✔
1435
                while (it != result.end()) {
273✔
1436
                    if (it->value < idx_val) {
243✔
1437
                        // Not found in excludes
156✔
1438
                        if (keep < it)
156✔
1439
                            *keep = *it;
165✔
1440
                        ++keep;
165✔
1441
                        ++it;
78✔
1442
                    }
153✔
1443
                    else {
162✔
1444
                        if (it->value == idx_val) {
174✔
1445
                            // found in excludes - don't keep
141✔
1446
                            ++it;
162✔
1447
                        }
240✔
1448
                        ++m;
117✔
1449
                        idx_val = m < res.end_ndx ? indexes.get(m) : std::numeric_limits<int64_t>::max();
117✔
1450
                    }
117✔
1451
                }
195✔
1452
                if (keep != result.end()) {
30✔
1453
                    result.erase(keep, result.end());
48✔
1454
                }
30✔
1455
                break;
48✔
1456
            }
18✔
1457
            case FindRes_single: {
36✔
1458
                // exclude single res
36✔
1459
                ObjKey key(res.payload);
36✔
1460
                auto pos = std::lower_bound(result.begin(), result.end(), key);
36✔
1461
                if (pos != result.end() && key == *pos) {
18✔
1462
                    result.erase(pos);
66✔
1463
                }
66✔
1464
                break;
186✔
1465
            }
1466
        }
48✔
1467
    }
48✔
1468
}
1,959✔
1469

1,791✔
1470

1,791✔
1471
void StringIndex::clear()
1,791✔
1472
{
474✔
1473
    Array values(m_array->get_alloc());
2,265✔
1474
    get_child(*m_array, 0, values);
2,265✔
1475
    REALM_ASSERT(m_array->size() == values.size() + 1);
474✔
1476

2,265✔
1477
    values.clear();
2,265✔
1478
    values.ensure_minimum_width(0x7FFFFFFF); // This ensures 31 bits plus a sign bit
474✔
1479

2,265✔
1480
    size_t size = 1;
2,265✔
1481
    m_array->truncate_and_destroy_children(size); // Don't touch `values` array
474✔
1482

474✔
1483
    m_array->set_type(Array::type_HasRefs);
474✔
1484
}
639,153✔
1485

638,679✔
1486

638,679✔
1487
void StringIndex::do_delete(ObjKey obj_key, StringData index_data, size_t offset)
638,679✔
1488
{
1,223,514✔
1489
    Allocator& alloc = m_array->get_alloc();
584,835✔
1490
    Array values(alloc);
584,835✔
1491
    get_child(*m_array, 0, values);
1,223,514✔
1492
    REALM_ASSERT(m_array->size() == values.size() + 1);
584,835✔
1493

1,223,514✔
1494
    // Create 4 byte index key
1,223,514✔
1495
    key_type key = create_key(index_data, offset);
1,223,514✔
1496

584,835✔
1497
    const size_t pos = values.lower_bound_int(key);
1,223,514✔
1498
    const size_t pos_refs = pos + 1; // first entry in refs points to offsets
617,340✔
1499
    REALM_ASSERT(pos != values.size());
617,340✔
1500

617,340✔
1501
    if (m_array->is_inner_bptree_node()) {
584,835✔
1502
        ref_type ref = m_array->get_as_ref(pos_refs);
2,442✔
1503
        StringIndex node(ref, m_array.get(), pos_refs, m_target_column, alloc);
34,947✔
1504
        node.do_delete(obj_key, index_data, offset);
2,496✔
1505

2,496✔
1506
        // Update the ref
2,496✔
1507
        if (node.is_empty()) {
2,496✔
1508
            values.erase(pos);
32,451✔
1509
            m_array->erase(pos_refs);
32,451✔
1510
            node.destroy();
32,451✔
1511
        }
54✔
1512
        else {
34,893✔
1513
            key_type max_val = node.get_last_key();
34,947✔
1514
            if (max_val != key_type(values.get(pos)))
608,616✔
1515
                values.set(pos, max_val);
606,174✔
1516
        }
608,616✔
1517
    }
314,736✔
1518
    else {
894,687✔
1519
        uint64_t ref = m_array->get(pos_refs);
894,687✔
1520
        if (ref & 1) {
894,687✔
1521
            REALM_ASSERT(int64_t(ref >> 1) == obj_key.value);
574,056✔
1522
            values.erase(pos);
280,176✔
1523
            m_array->erase(pos_refs);
574,056✔
1524
        }
574,056✔
1525
        else {
419,721✔
1526
            // A real ref either points to a list or a subindex
419,721✔
1527
            char* header = alloc.translate(ref_type(ref));
302,217✔
1528
            if (Array::get_context_flag_from_header(header)) {
419,721✔
1529
                StringIndex subindex(ref_type(ref), m_array.get(), pos_refs, m_target_column, alloc);
130,800✔
1530
                subindex.do_delete(obj_key, index_data, offset + s_index_key_length);
130,800✔
1531

130,800✔
1532
                if (subindex.is_empty()) {
130,800✔
1533
                    values.erase(pos);
123,138✔
1534
                    m_array->erase(pos_refs);
182,010✔
1535
                    subindex.destroy();
182,010✔
1536
                }
182,010✔
1537
            }
300,081✔
1538
            else {
354,888✔
1539
                IntegerColumn sub(alloc, ref_type(ref)); // Throws
354,888✔
1540
                sub.set_parent(m_array.get(), pos_refs);
354,888✔
1541
                size_t r = sub.find_first(obj_key.value);
178,512✔
1542
                size_t sub_size = sub.size(); // Slow
354,888✔
1543
                REALM_ASSERT_EX(r != sub_size, r, sub_size);
180,012✔
1544
                sub.erase(r);
180,012✔
1545

180,012✔
1546
                if (sub_size == 1) {
180,012✔
1547
                    values.erase(pos);
178,020✔
1548
                    m_array->erase(pos_refs);
295,524✔
1549
                    sub.destroy();
607,818✔
1550
                }
640,323✔
1551
            }
178,512✔
1552
        }
302,217✔
1553
    }
1,071,105✔
1554
}
1,073,547✔
1555

1556
void StringIndex::erase_string(ObjKey key, StringData value)
1557
{
947,424✔
1558
    do_delete(key, value, 0);
491,214✔
1559

491,214✔
1560
    // Collapse top nodes with single item
491,211✔
1561
    while (m_array->is_inner_bptree_node()) {
458,709✔
1562
        REALM_ASSERT(m_array->size() > 1); // node cannot be empty
2,445✔
1563
        if (m_array->size() > 2)
2,445✔
1564
            break;
2,445✔
1565

3✔
1566
        ref_type ref = m_array->get_as_ref(1);
3✔
1567
        m_array->set(1, 1); // avoid destruction of the extracted ref
3✔
1568
        m_array->destroy_deep();
488,712✔
1569
        m_array->init_from_ref(ref);
1570
        m_array->update_parent();
1571
    }
1572
}
458,709✔
1573

3,183✔
1574
namespace {
3,183✔
1575

3,183✔
1576
bool has_duplicate_values(const Array& node, const ClusterColumn& target_col) noexcept
3,183✔
1577
{
6,366✔
1578
    Allocator& alloc = node.get_alloc();
6,366✔
1579
    Array child(alloc);
3,183✔
1580
    size_t n = node.size();
3,183!
1581
    REALM_ASSERT(n >= 1);
3,183✔
1582
    if (node.is_inner_bptree_node()) {
3,183✔
1583
        // Inner node
×
1584
        for (size_t i = 1; i < n; ++i) {
×
1585
            ref_type ref = node.get_as_ref(i);
×
1586
            child.init_from_ref(ref);
×
1587
            if (has_duplicate_values(child, target_col))
3,183✔
1588
                return true;
1589
        }
1590
        return false;
16,425✔
1591
    }
17,553✔
1592

17,553✔
1593
    // Leaf node
17,553✔
1594
    for (size_t i = 1; i < n; ++i) {
28,065✔
1595
        int_fast64_t value = node.get(i);
14,370✔
1596
        bool is_single_row_index = (value & 1) != 0;
17,100✔
1597
        if (is_single_row_index)
17,100✔
1598
            continue;
11,640✔
1599

5,460✔
1600
        ref_type ref = to_ref(value);
5,460✔
1601
        child.init_from_ref(ref);
5,166✔
1602

3,630✔
1603
        bool is_subindex = child.get_context_flag();
4,266✔
1604
        if (is_subindex) {
4,266✔
1605
            if (has_duplicate_values(child, target_col))
2,436✔
1606
                return true;
900✔
1607
            continue;
1,830✔
1608
        }
1,830✔
1609

534✔
1610
        // Child is root of B+-tree of row indexes
534✔
1611
        IntegerColumn sub(alloc, ref);
534✔
1612
        if (sub.size() > 1) {
534✔
1613
            ObjKey first_key = ObjKey(sub.get(0));
240✔
1614
            ObjKey last_key = ObjKey(sub.back());
240✔
1615
            Mixed first = target_col.get_value(first_key);
240✔
1616
            Mixed last = target_col.get_value(last_key);
480✔
1617
            // Since the list is kept in sorted order, the first and
456✔
1618
            // last values will be the same only if the whole list is
456✔
1619
            // storing duplicate values.
240✔
1620
            if (first == last) {
240✔
1621
                return true;
240✔
1622
            }
240✔
1623
            // There may also be several short lists combined, so we need to
48✔
1624
            // check each of these individually for duplicates.
96✔
1625
            IntegerColumn::const_iterator it = sub.cbegin();
84✔
1626
            IntegerColumn::const_iterator it_end = sub.cend();
84✔
1627
            SortedListComparator slc(target_col);
84✔
1628
            while (it != it_end) {
132✔
1629
                Mixed it_data = target_col.get_value(ObjKey(*it));
72✔
1630
                IntegerColumn::const_iterator next = std::upper_bound(it, it_end, it_data, slc);
72✔
1631
                size_t count_of_value = next - it; // row index subtraction in `sub`
108✔
1632
                if (count_of_value > 1) {
108✔
1633
                    return true;
36✔
1634
                }
306✔
1635
                it = next;
48✔
1636
            }
2,103✔
1637
        }
3,207✔
1638
    }
294✔
1639

3,183✔
1640
    return false;
3,183✔
1641
}
3,183✔
1642

1643
} // anonymous namespace
747✔
1644

747✔
1645

747✔
1646
bool StringIndex::has_duplicate_values() const noexcept
1647
{
747✔
1648
    return ::has_duplicate_values(*m_array, m_target_column);
747✔
1649
}
150,837✔
1650

150,090✔
1651

150,090✔
1652
bool StringIndex::is_empty() const
1653
{
126,225✔
1654
    return m_array->size() == 1; // first entry in refs points to offsets
1,364,619✔
1655
}
1,364,619✔
1656

1,238,394✔
1657
template <>
1658
void StringIndex::insert<StringData>(ObjKey key, StringData value)
1,238,394✔
1659
{
295,359✔
1660
    StringConversionBuffer buffer;
295,359✔
1661

295,728✔
1662
    if (this->m_target_column.is_fulltext()) {
295,728✔
1663
        auto words = Tokenizer::get_instance()->reset(std::string_view(value)).get_all_tokens();
567✔
1664

567✔
1665
        for (auto& word : words) {
567✔
1666
            Mixed m(word);
567✔
1667
            insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
1,238,763✔
1668
        }
1,238,763✔
1669
    }
1,238,394✔
1670
    else {
1,533,555✔
1671
        Mixed m(value);
295,161✔
1672
        insert_with_offset(key, m.get_index_data(buffer), m, 0); // Throws
295,161✔
1673
    }
626,595✔
1674
}
626,694✔
1675

331,434✔
1676
template <>
1677
void StringIndex::set<StringData>(ObjKey key, StringData new_value)
331,434✔
1678
{
219,813✔
1679
    StringConversionBuffer buffer;
219,813✔
1680
    Mixed old_value = get(key);
219,813✔
1681
    Mixed new_value2 = Mixed(new_value);
219,729✔
1682

219,813✔
1683
    if (this->m_target_column.is_fulltext()) {
219,732✔
1684
        auto tokenizer = Tokenizer::get_instance();
87✔
1685
        StringData old_string = old_value.get_index_data(buffer);
87✔
1686
        std::set<std::string> old_words;
168✔
1687

168✔
1688
        if (old_string.size() > 0) {
168✔
1689
            tokenizer->reset({old_string.data(), old_string.size()});
87✔
1690
            old_words = tokenizer->get_all_tokens();
3✔
1691
        }
87✔
1692

168✔
1693
        tokenizer->reset({new_value.data(), new_value.size()});
84✔
1694
        auto new_words = tokenizer->get_all_tokens();
84✔
1695

84✔
1696
        auto w1 = old_words.begin();
354✔
1697
        auto w2 = new_words.begin();
270✔
1698

147✔
1699
        // Do a diff, deleting words no longer present and
147✔
1700
        // inserting new words
147✔
1701
        while (w1 != old_words.end() && w2 != new_words.end()) {
393✔
1702
            if (*w1 < *w2) {
285✔
1703
                erase_string(key, *w1);
162✔
1704
                ++w1;
162✔
1705
            }
162✔
1706
            else if (*w2 < *w1) {
147✔
1707
                Mixed m(*w2);
123✔
1708
                insert_with_offset(key, m.get_index_data(buffer), m, 0);
123✔
1709
                ++w2;
123✔
1710
            }
285✔
1711
            else {
108✔
1712
                ++w1;
24✔
1713
                ++w2;
24✔
1714
            }
24✔
1715
        }
1,863✔
1716
        while (w1 != old_words.end()) {
1,677✔
1717
            erase_string(key, *w1);
1,593✔
1718
            ++w1;
1719
        }
1,593✔
1720
        while (w2 != new_words.end()) {
3,270✔
1721
            Mixed m(*w2);
1,677✔
1722
            insert_with_offset(key, m.get_index_data(buffer), m, 0);
332,943✔
1723

332,943✔
1724
            ++w2;
1,593✔
1725
        }
1,593✔
1726
    }
316,671✔
1727
    else {
219,645✔
1728
        if (REALM_LIKELY(new_value2 != old_value)) {
536,232✔
1729
            // We must erase this row first because erase uses find_first which
532,989✔
1730
            // might find the duplicate if we insert before erasing.
532,989✔
1731
            erase(key); // Throws
547,752✔
1732

547,836✔
1733
            auto index_data = new_value2.get_index_data(buffer);
216,402✔
1734
            insert_with_offset(key, index_data, new_value2, 0); // Throws
216,402✔
1735
        }
216,480✔
1736
    }
219,723✔
1737
}
219,807✔
1738

1739
void StringIndex::node_add_key(ref_type ref)
78✔
1740
{
138✔
1741
    REALM_ASSERT(ref);
138✔
1742
    REALM_ASSERT(m_array->is_inner_bptree_node());
138✔
1743

138✔
1744
    Allocator& alloc = m_array->get_alloc();
60✔
1745
    Array offsets(alloc);
138✔
1746
    get_child(*m_array, 0, offsets);
138✔
1747
    REALM_ASSERT(m_array->size() == offsets.size() + 1);
138✔
1748
    REALM_ASSERT(offsets.size() < REALM_MAX_BPNODE_SIZE + 1);
138✔
1749

138✔
1750
    Array new_top(alloc);
60✔
1751
    Array new_offsets(alloc);
138✔
1752
    new_top.init_from_ref(ref);
138✔
1753
    new_offsets.init_from_ref(new_top.get_as_ref(0));
138✔
1754
    REALM_ASSERT(!new_offsets.is_empty());
138✔
1755

60✔
1756
    int64_t key = new_offsets.back();
60✔
1757
    offsets.add(key);
60✔
1758
    m_array->add(ref);
10,223,778✔
1759
}
10,223,778✔
1760

10,223,718✔
1761
// Must return true if value of object(key) is less than needle.
336✔
1762
bool SortedListComparator::operator()(int64_t key_value, Mixed needle) // used in lower_bound
10,223,382✔
1763
{
10,211,211✔
1764
    Mixed a = m_column.get_value(ObjKey(key_value));
20,434,581✔
1765
    if (a.is_null() && !needle.is_null())
10,384,815✔
1766
        return true;
363✔
1767
    else if (needle.is_null() && !a.is_null())
20,260,608✔
1768
        return false;
10,045,644✔
1769
    else if (a.is_null() && needle.is_null())
10,210,842✔
1770
        return false;
178,863✔
1771

10,031,979✔
1772
    if (a == needle)
10,031,979✔
1773
        return false;
10,024,191✔
1774

11,910✔
1775
    // The Mixed::operator< uses a lexicograpical comparison, therefore we
11,910✔
1776
    // cannot use our utf8_compare here because we need to be consistent with
7,788✔
1777
    // using the same compare method as how these strings were they were put
7,788✔
1778
    // into this ordered column in the first place.
7,788✔
1779
    return a.compare_signed(needle) < 0;
7,788✔
1780
}
9,176,751✔
1781

9,168,963✔
1782

9,173,787✔
1783
// Must return true if value of needle is less than value of object(key).
9,173,787✔
1784
bool SortedListComparator::operator()(Mixed needle, int64_t key_value) // used in upper_bound
9,173,787✔
1785
{
2,156,644,342✔
1786
    Mixed a = m_column.get_value(ObjKey(key_value));
2,156,644,342✔
1787
    if (needle == a) {
9,161,412✔
1788
        return false;
9,161,412✔
1789
    }
9,161,412✔
1790
    return !(*this)(key_value, needle);
2,147,483,647✔
1791
}
2,147,483,647✔
1792

1793
// LCOV_EXCL_START ignore debug functions
1794

1795

1796
void StringIndex::verify() const
1797
{
660✔
1798
#ifdef REALM_DEBUG
1,320✔
1799
    m_array->verify();
1,320✔
1800

1,320✔
1801
    Allocator& alloc = m_array->get_alloc();
660✔
1802
    const size_t array_size = m_array->size();
1,320✔
1803

1,320✔
1804
    // Get first matching row for every key
660✔
1805
    if (m_array->is_inner_bptree_node()) {
660✔
1806
        for (size_t i = 1; i < array_size; ++i) {
660✔
1807
            size_t ref = m_array->get_as_ref(i);
×
1808
            StringIndex ndx(ref, nullptr, 0, m_target_column, alloc);
×
1809
            ndx.verify();
×
1810
        }
×
1811
    }
×
1812
    else {
660✔
1813
        size_t column_size = m_target_column.size();
1,320✔
1814
        for (size_t i = 1; i < array_size; ++i) {
2,028✔
1815
            int64_t ref = m_array->get(i);
2,076✔
1816

1,416✔
1817
            // low bit set indicate literal ref (shifted)
708✔
1818
            if (ref & 1) {
708✔
1819
                size_t r = to_size_t((uint64_t(ref) >> 1));
756✔
1820
                REALM_ASSERT_EX(r < column_size, r, column_size);
96✔
1821
            }
96✔
1822
            else {
708✔
1823
                // A real ref either points to a list or a subindex
1,320✔
1824
                char* header = alloc.translate(to_ref(ref));
660✔
1825
                if (Array::get_context_flag_from_header(header)) {
1,320✔
1826
                    StringIndex ndx(to_ref(ref), m_array.get(), i, m_target_column, alloc);
1,308✔
1827
                    ndx.verify();
1,296✔
1828
                }
1,296✔
1829
                else {
660✔
1830
                    IntegerColumn sub(alloc, to_ref(ref)); // Throws
24✔
1831
                    IntegerColumn::const_iterator it = sub.cbegin();
24✔
1832
                    IntegerColumn::const_iterator it_end = sub.cend();
24✔
1833
                    SortedListComparator slc(m_target_column);
24✔
1834
                    Mixed previous = get(ObjKey(*it));
24✔
1835
                    size_t last_row = to_size_t(*it);
24✔
1836

24✔
1837
                    // Check that strings listed in sub are in sorted order
12✔
1838
                    // and if there are duplicates, that the row numbers are
12✔
1839
                    // sorted in the group of duplicates.
12✔
1840
                    while (it != it_end) {
48✔
1841
                        Mixed it_data = get(ObjKey(*it));
84✔
1842
                        size_t it_row = to_size_t(*it);
72✔
1843
                        REALM_ASSERT(previous.compare_signed(it_data) <= 0);
72✔
1844
                        if (it != sub.cbegin() && previous == it_data) {
72✔
1845
                            REALM_ASSERT_EX(it_row > last_row, it_row, last_row);
36✔
1846
                        }
×
1847
                        last_row = it_row;
36✔
1848
                        previous = get(ObjKey(*it));
72✔
1849
                        ++it;
72✔
1850
                    }
72✔
1851
                }
48✔
1852
            }
672✔
1853
        }
1,368✔
1854
    }
1,368✔
1855
// FIXME: Extend verification along the lines of IntegerColumn::verify().
1,320✔
1856
#endif
660✔
1857
}
1,320✔
1858

660✔
1859
#ifdef REALM_DEBUG
1860

1861
template <class T>
1862
void StringIndex::verify_entries(const ClusterColumn& column) const
1863
{
1864
    std::vector<ObjKey> results;
1865

1866
    auto it = column.begin();
1867
    auto end = column.end();
1868
    auto col = column.get_column_key();
1869
    while (it != end) {
1870
        ObjKey key = it->get_key();
1871
        T value = it->get<T>(col);
1872

1873
        find_all(results, value);
1874

1875
        auto ndx = find(results.begin(), results.end(), key);
1876
        REALM_ASSERT(ndx != results.end());
1877
        size_t found = count(value);
1878
        REALM_ASSERT_EX(found >= 1, found);
1879
        results.clear();
1880
    }
1881
}
1882

1883
namespace {
1884

1885
void out_hex(std::ostream& out, uint64_t val)
1886
{
1887
    if (val) {
1888
        out_hex(out, val >> 8);
NEW
1889
        if (char c = val & 0xFF) {
×
NEW
1890
            out << c;
×
NEW
1891
        }
×
NEW
1892
    }
×
NEW
1893
}
×
1894

×
1895
} // namespace
1896

1897
void StringIndex::dump_node_structure(const Array& node, std::ostream& out, int level)
NEW
1898
{
×
NEW
1899
    int indent = level * 2;
×
1900
    Allocator& alloc = node.get_alloc();
1901
    Array subnode(alloc);
1902

×
1903
    size_t node_size = node.size();
×
NEW
1904
    REALM_ASSERT(node_size >= 1);
×
NEW
1905

×
NEW
1906
    out << std::hex;
×
1907

×
1908
    bool node_is_leaf = !node.is_inner_bptree_node();
×
1909
    if (node_is_leaf) {
×
1910
        out << std::setw(indent) << ""
×
1911
            << "Leaf (B+ tree) (ref: " << node.get_ref() << ")\n";
1912
    }
NEW
1913
    else {
×
NEW
1914
        out << std::setw(indent) << ""
×
NEW
1915
            << "Inner node (B+ tree) (ref: " << node.get_ref() << ")\n";
×
NEW
1916
    }
×
NEW
1917

×
NEW
1918
    subnode.init_from_ref(to_ref(node.front()));
×
NEW
1919
    out << std::setw(indent) << ""
×
NEW
1920
        << "  Keys (keys_ref: " << subnode.get_ref() << ", ";
×
1921
    if (subnode.is_empty()) {
1922
        out << "no keys";
1923
    }
1924
    else {
1925
        out << "keys: ";
1926
        for (size_t i = 0; i != subnode.size(); ++i) {
1927
            if (i != 0)
×
1928
                out << ", ";
×
1929
            out_hex(out, subnode.get(i));
×
1930
        }
×
1931
    }
1932
    out << ")\n";
×
1933

×
1934
    if (node_is_leaf) {
1935
        for (size_t i = 1; i != node_size; ++i) {
×
1936
            int_fast64_t value = node.get(i);
1937
            bool is_single_row_index = (value & 1) != 0;
×
1938
            if (is_single_row_index) {
×
1939
                out << std::setw(indent) << ""
×
1940
                    << "  Single row index (value: " << (value / 2) << ")\n";
×
1941
                continue;
×
1942
            }
×
1943
            subnode.init_from_ref(to_ref(value));
×
1944
            bool is_subindex = subnode.get_context_flag();
×
1945
            if (is_subindex) {
×
1946
                out << std::setw(indent) << ""
1947
                    << "  Subindex\n";
×
1948
                dump_node_structure(subnode, out, level + 2);
×
1949
                continue;
×
1950
            }
×
1951
            IntegerColumn indexes(alloc, to_ref(value));
×
1952
            out << std::setw(indent) << ""
×
1953
                << "  List of row indexes\n";
×
1954
            indexes.dump_values(out, level + 2);
×
1955
        }
×
1956
        return;
×
1957
    }
×
NEW
1958

×
1959

×
1960
    size_t num_children = node_size - 1;
×
1961
    size_t child_ref_begin = 1;
×
1962
    size_t child_ref_end = 1 + num_children;
1963
    for (size_t i = child_ref_begin; i != child_ref_end; ++i) {
×
1964
        subnode.init_from_ref(node.get_as_ref(i));
×
1965
        dump_node_structure(subnode, out, level + 1);
×
1966
    }
×
1967
}
×
1968

1969

1970
void StringIndex::do_dump_node_structure(std::ostream& out, int level) const
1971
{
×
1972
    dump_node_structure(*m_array, out, level);
×
1973
}
×
1974

×
1975
#endif // LCOV_EXCL_STOP ignore debug functions
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc