• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2408

11 Jun 2024 12:22PM UTC coverage: 90.962% (+0.01%) from 90.951%
2408

push

Evergreen

web-flow
Optimize Query::between for integers and timestamps (#7785)

* Add benchmark test for QueryRange<Timestamp>

102188 of 180430 branches covered (56.64%)

122 of 123 new or added lines in 9 files covered. (99.19%)

43 existing lines in 7 files now uncovered.

214700 of 236033 relevant lines covered (90.96%)

5818036.06 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.21
/src/realm/query_engine.hpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
/*
20
A query consists of node objects, one for each query condition. Each node contains pointers to all other nodes:
21

22
node1        node2         node3
23
------       -----         -----
24
node2*       node1*        node1*
25
node3*       node3*        node2*
26

27
The construction of all this takes part in query.cpp. Each node has two important functions:
28

29
    aggregate(start, end)
30
    aggregate_local(start, end)
31

32
The aggregate() function executes the aggregate of a query. You can call the method on any of the nodes
33
(except children nodes of OrNode and SubtableNode) - it has the same behaviour. The function contains
34
scheduling that calls aggregate_local(start, end) on different nodes with different start/end ranges,
35
depending on what it finds is most optimal.
36

37
The aggregate_local() function contains a tight loop that tests the condition of its own node, and upon match
38
it tests all other conditions at that index to report a full match or not. It will remain in the tight loop
39
after a full match.
40

41
So a call stack with 2 and 9 being local matches of a node could look like this:
42

43
aggregate(0, 10)
44
    node1->aggregate_local(0, 3)
45
        node2->find_first_local(2, 3)
46
        node3->find_first_local(2, 3)
47
    node3->aggregate_local(3, 10)
48
        node1->find_first_local(4, 5)
49
        node2->find_first_local(4, 5)
50
        node1->find_first_local(7, 8)
51
        node2->find_first_local(7, 8)
52

53
find_first_local(n, n + 1) is a function that can be used to test a single row of another condition. Note that
54
this is very simplified. There are other statistical arguments to the methods, and also, find_first_local() can be
55
called from a callback function called by an integer Array.
56

57

58
Template arguments in methods:
59
----------------------------------------------------------------------------------------------------
60

61
TConditionFunction: Each node has a condition from query_conditions.c such as Equal, GreaterEqual, etc
62

63
TConditionValue:    Type of values in condition column. That is, int64_t, float, int, bool, etc
64
*/
65

66
#ifndef REALM_QUERY_ENGINE_HPP
67
#define REALM_QUERY_ENGINE_HPP
68

69
#include <algorithm>
70
#include <array>
71
#include <functional>
72
#include <sstream>
73
#include <string>
74
#include <typeindex>
75

76
#include <realm/array_backlink.hpp>
77
#include <realm/array_basic.hpp>
78
#include <realm/array_binary.hpp>
79
#include <realm/array_bool.hpp>
80
#include <realm/array_decimal128.hpp>
81
#include <realm/array_fixed_bytes.hpp>
82
#include <realm/array_key.hpp>
83
#include <realm/array_list.hpp>
84
#include <realm/array_mixed.hpp>
85
#include <realm/array_string.hpp>
86
#include <realm/array_timestamp.hpp>
87
#include <realm/column_integer.hpp>
88
#include <realm/column_type_traits.hpp>
89
#include <realm/index_string.hpp>
90
#include <realm/query_conditions.hpp>
91
#include <realm/query_expression.hpp>
92
#include <realm/table.hpp>
93
#include <realm/unicode.hpp>
94
#include <realm/util/flat_map.hpp>
95
#include <realm/util/serializer.hpp>
96
#include <realm/utilities.hpp>
97

98
#include <map>
99
#include <unordered_set>
100

101
#if REALM_X86_OR_X64_TRUE && defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219
102
#include <immintrin.h>
103
#endif
104

105
namespace realm {
106

107
class IndexEvaluator;
108

109
class ParentNode {
110
    typedef ParentNode ThisType;
111

112
public:
113
    ParentNode() = default;
2,418,756✔
114
    virtual ~ParentNode() = default;
5,558,409✔
115

116
    virtual bool has_search_index() const
117
    {
19,956✔
118
        return false;
19,956✔
119
    }
19,956✔
120
    virtual const IndexEvaluator* index_based_keys()
121
    {
305,427✔
122
        return nullptr;
305,427✔
123
    }
305,427✔
124

125
    void gather_children(std::vector<ParentNode*>& v)
126
    {
2,220,969✔
127
        m_children.clear();
2,220,969✔
128
        size_t i = v.size();
2,220,969✔
129
        v.push_back(this);
2,220,969✔
130

131
        if (m_child)
2,220,969✔
132
            m_child->gather_children(v);
23,628✔
133

134
        m_children = v;
2,220,969✔
135
        m_children.erase(m_children.begin() + i);
2,220,969✔
136
        m_children.insert(m_children.begin(), this);
2,220,969✔
137
    }
2,220,969✔
138

139
    double cost() const
140
    {
7,154,607✔
141
        constexpr size_t bitwidth_time_unit = 64;
7,154,607✔
142
        // dt = 1/64 to 1. Match dist is 8 times more important than bitwidth
143
        return 8 * bitwidth_time_unit / m_dD + m_dT;
7,154,607✔
144
    }
7,154,607✔
145

146
    size_t find_first(size_t start, size_t end);
147

148
    bool match(const Obj& obj);
149

150
    virtual void init(bool will_query_ranges)
151
    {
2,214,393✔
152
        m_dD = 100.0;
2,214,393✔
153

154
        if (m_child)
2,214,393✔
155
            m_child->init(will_query_ranges);
23,628✔
156
    }
2,214,393✔
157

158
    void get_link_dependencies(std::vector<TableKey>& tables) const
159
    {
1,629,165✔
160
        collect_dependencies(tables);
1,629,165✔
161
        if (m_child)
1,629,165✔
162
            m_child->get_link_dependencies(tables);
24,102✔
163
    }
1,629,165✔
164

165
    void set_table(ConstTableRef table)
166
    {
3,248,064✔
167
        if (table == m_table)
3,248,064✔
168
            return;
734,853✔
169

170
        m_table = table;
2,513,211✔
171
        if (m_child)
2,513,211✔
172
            m_child->set_table(table);
3,918✔
173
        table_changed();
2,513,211✔
174
    }
2,513,211✔
175

176
    void set_cluster(const Cluster* cluster)
177
    {
7,867,290✔
178
        m_cluster = cluster;
7,867,290✔
179
        if (m_child)
7,867,290✔
180
            m_child->set_cluster(cluster);
122,493✔
181
        cluster_changed();
7,867,290✔
182
    }
7,867,290✔
183

184
    virtual void collect_dependencies(std::vector<TableKey>&) const {}
1,625,418✔
185

186
    virtual size_t find_first_local(size_t start, size_t end) = 0;
187
    virtual size_t find_all_local(size_t start, size_t end);
188

189
    virtual size_t aggregate_local(QueryStateBase* st, size_t start, size_t end, size_t local_limit,
190
                                   ArrayPayload* source_column);
191

192
    virtual std::string validate()
193
    {
6✔
194
        return m_child ? m_child->validate() : "";
6✔
195
    }
6✔
196

197
    ParentNode(const ParentNode& from);
198

199
    void add_child(std::unique_ptr<ParentNode> child)
200
    {
23,490✔
201
        if (m_child)
23,490✔
202
            m_child->add_child(std::move(child));
648✔
203
        else
22,842✔
204
            m_child = std::move(child);
22,842✔
205
    }
23,490✔
206

207
    virtual std::unique_ptr<ParentNode> clone() const = 0;
208

209
    virtual std::string describe(util::serializer::SerialisationState&) const
210
    {
×
211
        return "";
×
212
    }
×
213

214
    virtual std::string describe_condition() const
215
    {
×
216
        return "matches";
×
217
    }
×
218

219
    virtual std::string describe_expression(util::serializer::SerialisationState& state) const
220
    {
48,540✔
221
        std::string s;
48,540✔
222
        s = describe(state);
48,540✔
223
        if (m_child) {
48,540✔
224
            s = s + " and " + m_child->describe_expression(state);
864✔
225
        }
864✔
226
        return s;
48,540✔
227
    }
48,540✔
228

229
    bool consume_condition(ParentNode& other, bool ignore_indexes, std::optional<size_t> num_identical_conditions)
230
    {
708,552✔
231
        // We can only combine conditions if they're the same operator on the
232
        // same column and there's no additional conditions ANDed on
233
        if (m_condition_column_key != other.m_condition_column_key)
708,552✔
234
            return false;
3,270✔
235
        if (m_child || other.m_child)
705,282✔
236
            return false;
222✔
237
        if (typeid(*this) != typeid(other))
705,060✔
238
            return false;
36✔
239

240
        // If a search index is present, don't try to combine conditions since index search is most likely faster.
241
        // Assuming N elements to search and M conditions to check:
242
        // 1) search index present:                     O(log(N)*M)
243
        // 2) no search index, combine conditions:      O(N)
244
        // 3) no search index, conditions not combined: O(N*M)
245
        // In practice N is much larger than M, so if we have a search index, choose 1, otherwise if possible
246
        // choose 2. The exception is if we're inside a Not group or if the query is restricted to a view, as in those
247
        // cases end will always be start+1 and we'll have O(N*M) runtime even with a search index, so we want to
248
        // combine even with an index.
249
        //
250
        // If the column is not a string type, then as the number of conditions increases, the cost of using the index
251
        // for each condition rises faster than using a hash set to check if each value in a leaf matches a condition.
252
        // So if there are _many_ conditions (as defined below), combine conditions even if an index is present.
253
        if (has_search_index() && !ignore_indexes &&
705,024✔
254
            (m_condition_column_key.get_type() == col_type_String || !num_identical_conditions ||
705,024✔
255
             *num_identical_conditions < c_threshold_of_conditions_overwhelming_index))
647,082✔
256
            return false;
214,245✔
257
        return do_consume_condition(other);
490,779✔
258
    }
705,024✔
259

260
    constexpr static size_t c_threshold_of_conditions_overwhelming_index = 100;
261
    bool num_conditions_may_need_combination_counts(size_t num_total_conditions)
262
    {
27,192✔
263
        return num_total_conditions >= c_threshold_of_conditions_overwhelming_index;
27,192✔
264
    }
27,192✔
265

266
    std::unique_ptr<ParentNode> m_child;
267
    std::vector<ParentNode*> m_children;
268
    mutable ColKey m_condition_column_key = ColKey(); // Column of search criteria
269

270
    double m_dD;       // Average row distance between each local match at current position
271
    double m_dT = 1.0; // Time overhead of testing index i + 1 if we have just tested index i. > 1 for linear scans, 0
272
    // for index/tableview
273

274
    size_t m_probes = 0;
275
    size_t m_matches = 0;
276

277
protected:
278
    ConstTableRef m_table = ConstTableRef();
279
    const Cluster* m_cluster = nullptr;
280
    QueryStateBase* m_state = nullptr;
281

282
    ColumnType get_real_column_type(ColKey key)
283
    {
×
284
        return m_table.unchecked_ptr()->get_real_column_type(key);
×
285
    }
×
286

287
private:
288
    virtual void table_changed() {}
2,363,433✔
289
    virtual void cluster_changed()
290
    {
×
291
        // TODO: Should eventually be pure
292
    }
×
293
    virtual bool do_consume_condition(ParentNode&)
294
    {
32,070✔
295
        return false;
32,070✔
296
    }
32,070✔
297
};
298

299

300
class ColumnNodeBase : public ParentNode {
301
protected:
302
    ColumnNodeBase(ColKey column_key)
303
    {
1,641,540✔
304
        m_condition_column_key = column_key;
1,641,540✔
305
    }
1,641,540✔
306

307
    ColumnNodeBase(const ColumnNodeBase& from)
308
        : ParentNode(from)
875,394✔
309
    {
1,729,905✔
310
    }
1,729,905✔
311
};
312

313
class IndexEvaluator {
314
public:
315
    void init(SearchIndex* index, Mixed value);
316
    void init(std::vector<ObjKey>* storage);
317

318
    size_t do_search_index(const Cluster* cluster, size_t start, size_t end);
319

320
    size_t size() const
321
    {
13,098✔
322
        if (m_matching_keys) {
13,098✔
323
            return m_matching_keys->size();
444✔
324
        }
444✔
325
        return m_results_end - m_results_start;
12,654✔
326
    }
13,098✔
327
    ObjKey get(size_t ndx) const
328
    {
265,497✔
329
        return get_internal(ndx + m_results_start);
265,497✔
330
    }
265,497✔
331

332
private:
333
    ObjKey get_internal(size_t ndx) const
334
    {
72,407,565✔
335
        if (m_matching_keys) {
72,407,565✔
336
            return m_matching_keys->at(ndx);
606✔
337
        }
606✔
338
        if (m_index_matches) {
72,406,959✔
339
            return ObjKey(m_index_matches->get(ndx));
72,398,028✔
340
        }
72,398,028✔
341
        else if (m_results_end == 1) {
9,309✔
342
            REALM_ASSERT_EX(ndx == 0, ndx);
9,309✔
343
            return m_actual_key;
9,309✔
344
        }
9,309✔
345
        return ObjKey();
4,294,967,294✔
346
    }
72,406,959✔
347

348
    std::shared_ptr<IntegerColumn> m_index_matches;
349
    ObjKey m_actual_key;
350
    ObjKey m_last_start_key;
351
    size_t m_results_start = 0;
352
    size_t m_results_ndx = 0;
353
    size_t m_results_end = 0;
354

355
    std::vector<ObjKey>* m_matching_keys = nullptr;
356
};
357

358
template <class LeafType>
359
class IntegerNodeBase : public ColumnNodeBase {
360
public:
361
    using TConditionValue = typename LeafType::value_type;
362

363
protected:
364
    IntegerNodeBase(TConditionValue value, ColKey column_key)
365
        : ColumnNodeBase(column_key)
836,535✔
366
        , m_value(std::move(value))
824,571✔
367
    {
1,654,854✔
368
    }
1,654,854✔
369

370
    IntegerNodeBase(const IntegerNodeBase& from)
371
        : ColumnNodeBase(from)
875,949✔
372
        , m_value(from.m_value)
848,352✔
373
    {
1,728,303✔
374
    }
1,728,303✔
375

376
    void cluster_changed() override
377
    {
6,946,248✔
378
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
6,946,248✔
379
        m_cluster->init_leaf(this->m_condition_column_key, &*m_leaf);
6,946,248✔
380
    }
6,946,248✔
381

382
    void init(bool will_query_ranges) override
383
    {
1,617,768✔
384
        ColumnNodeBase::init(will_query_ranges);
1,617,768✔
385

386
        m_dT = .25;
1,617,768✔
387
    }
1,617,768✔
388

389
    template <class TConditionFunction>
390
    size_t find_all_local(size_t start, size_t end)
391
    {
6,007,260✔
392
        m_leaf->template find<TConditionFunction>(m_value, start, end, m_state);
6,007,260✔
393
        return end;
6,007,260✔
394
    }
6,007,260✔
395

396
    std::string describe(util::serializer::SerialisationState& state) const override
397
    {
732✔
398
        return state.describe_column(ParentNode::m_table, ColumnNodeBase::m_condition_column_key) + " " +
732✔
399
               describe_condition() + " " + util::serializer::print_value(this->m_value);
732✔
400
    }
732✔
401

402
    // Search value:
403
    TConditionValue m_value;
404

405
    // Leaf cache
406
    std::optional<LeafType> m_leaf;
407
};
408

409

410
template <class LeafType>
411
class BetweenNode : public ColumnNodeBase {
412
public:
413
    using TConditionValue = typename util::RemoveOptional<typename LeafType::value_type>::type;
414

415
    BetweenNode(TConditionValue from, TConditionValue to, ColKey column_key)
416
        : ColumnNodeBase(column_key)
258✔
417
        , m_from(std::move(from))
240✔
418
        , m_to(std::move(to))
240✔
419
    {
516✔
420
        if (is_null(from) || is_null(to))
516✔
NEW
421
            throw InvalidArgument("'from' or 'to' must not be null");
×
422
    }
516✔
423

424
    BetweenNode(const BetweenNode& from)
425
        : ColumnNodeBase(from)
243✔
426
        , m_from(from.m_from)
237✔
427
        , m_to(from.m_to)
237✔
428
    {
486✔
429
    }
486✔
430

431
    void cluster_changed() override
432
    {
678✔
433
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
678✔
434
        m_cluster->init_leaf(this->m_condition_column_key, &*m_leaf);
678✔
435
    }
678✔
436

437
    void init(bool will_query_ranges) override
438
    {
648✔
439
        ColumnNodeBase::init(will_query_ranges);
648✔
440

441
        m_dT = .25;
648✔
442
    }
648✔
443

444
    size_t find_first_local(size_t start, size_t end) override
445
    {
8,964✔
446
        return m_leaf->find_first_in_range(m_from, m_to, start, end);
8,964✔
447
    }
8,964✔
448

449
    std::string describe(util::serializer::SerialisationState& state) const override
450
    {
78✔
451
        return state.describe_column(ParentNode::m_table, ColumnNodeBase::m_condition_column_key) + " between {" +
78✔
452
               util::serializer::print_value(this->m_from) + ", " + util::serializer::print_value(this->m_to) + "}";
78✔
453
    }
78✔
454

455
    std::unique_ptr<ParentNode> clone() const override
456
    {
486✔
457
        return std::unique_ptr<ParentNode>(new BetweenNode(*this));
486✔
458
    }
486✔
459

460
private:
461
    // Search values:
462
    TConditionValue m_from;
463
    TConditionValue m_to;
464

465
    // Leaf cache
466
    std::optional<LeafType> m_leaf;
467
};
468

469

470
template <class LeafType, class TConditionFunction>
471
class IntegerNode : public IntegerNodeBase<LeafType> {
472
    using BaseType = IntegerNodeBase<LeafType>;
473
    using ThisType = IntegerNode<LeafType, TConditionFunction>;
474

475
public:
476
    using TConditionValue = typename BaseType::TConditionValue;
477

478
    IntegerNode(TConditionValue value, ColKey column_key)
479
        : BaseType(value, column_key)
35,973✔
480
    {
76,050✔
481
    }
76,050✔
482
    IntegerNode(const IntegerNode& from)
483
        : BaseType(from)
57,510✔
484
    {
123,507✔
485
    }
123,507✔
486

487
    size_t find_first_local(size_t start, size_t end) override
488
    {
205,812✔
489
        return this->m_leaf->template find_first<TConditionFunction>(this->m_value, start, end);
205,812✔
490
    }
205,812✔
491

492
    size_t find_all_local(size_t start, size_t end) override
493
    {
398,682✔
494
        return BaseType::template find_all_local<TConditionFunction>(start, end);
398,682✔
495
    }
398,682✔
496

497
    std::string describe_condition() const override
498
    {
732✔
499
        return TConditionFunction::description();
732✔
500
    }
732✔
501

502
    std::unique_ptr<ParentNode> clone() const override
503
    {
123,504✔
504
        return std::unique_ptr<ParentNode>(new ThisType(*this));
123,504✔
505
    }
123,504✔
506
};
507

508
template <size_t linear_search_threshold, class LeafType, class NeedleContainer>
509
static size_t find_first_haystack(LeafType& leaf, NeedleContainer& needles, size_t start, size_t end)
510
{
1,652,295✔
511
    // for a small number of conditions, it is faster to do a linear search than to compute the hash
512
    // the exact thresholds were found experimentally
513
    if (needles.size() < linear_search_threshold) {
1,652,295✔
514
        for (size_t i = start; i < end; ++i) {
2,451,435✔
515
            auto element = leaf.get(i);
2,191,617✔
516
            if (std::find(needles.begin(), needles.end(), element) != needles.end())
2,191,617✔
517
                return i;
1,334,793✔
518
        }
2,191,617✔
519
    }
1,594,611✔
520
    else {
57,684✔
521
        for (size_t i = start; i < end; ++i) {
57,747✔
522
            auto element = leaf.get(i);
57,684✔
523
            if (needles.count(element))
57,684✔
524
                return i;
57,621✔
525
        }
57,684✔
526
    }
57,684✔
527
    return realm::npos;
259,881✔
528
}
1,652,295✔
529

530
template <size_t linear_search_threshold, class LeafType, class NeedleContainer>
531
static size_t find_all_haystack(LeafType& leaf, NeedleContainer& needles, size_t start, size_t end,
532
                                QueryStateBase* state)
533
{
474✔
534
    if (needles.size() < linear_search_threshold) {
474✔
535
        for (size_t i = start; i < end; ++i) {
16,932✔
536
            auto element = leaf.get(i);
16,638✔
537
            if (std::find(needles.begin(), needles.end(), element) != needles.end())
16,638✔
538
                state->match(i);
654✔
539
        }
16,638✔
540
    }
294✔
541
    else {
180✔
542
        for (size_t i = start; i < end; ++i) {
43,428✔
543
            auto element = leaf.get(i);
43,248✔
544
            if (needles.count(element))
43,248✔
545
                state->match(i);
43,206✔
546
        }
43,248✔
547
    }
180✔
548
    return end;
474✔
549
}
474✔
550

551
template <class LeafType>
552
class IntegerNode<LeafType, Equal> : public IntegerNodeBase<LeafType> {
553
public:
554
    using BaseType = IntegerNodeBase<LeafType>;
555
    using TConditionValue = typename BaseType::TConditionValue;
556
    using ThisType = IntegerNode<LeafType, Equal>;
557

558
    IntegerNode(TConditionValue value, ColKey column_key)
559
        : BaseType(value, column_key)
709,875✔
560
    {
1,487,256✔
561
    }
1,487,256✔
562
    IntegerNode(ColKey col, const Mixed* begin, const Mixed* end)
563
        : BaseType(realm::npos, col)
237✔
564
    {
462✔
565
        static_assert(is_any_v<TConditionValue, std::optional<int64_t>, int64_t>, "unexpected type change");
462✔
566
        for (const Mixed* it = begin; it != end; ++it) {
14,589✔
567
            if constexpr (std::is_same_v<TConditionValue, std::optional<int64_t>>) {
14,124✔
568
                if (it->is_null()) {
8,628✔
569
                    m_needles.insert(std::nullopt);
6✔
570
                    continue;
6✔
571
                }
6✔
572
            }
8,628✔
573
            if (const int64_t* val = it->get_if<int64_t>()) {
14,121✔
574
                m_needles.insert(*val);
14,040✔
575
            }
14,040✔
576
            else if (const double* val = it->get_if<double>()) {
81✔
577
                // JS encodes numbers as double
578
                // only add this value if it represents an integer
579
                if (*val == double(int64_t(*val))) {
18✔
580
                    m_needles.insert(int64_t(*val));
×
581
                }
×
582
            }
18✔
583
        }
11,379✔
584
    }
462✔
585

586
    void init(bool will_query_ranges) override
587
    {
1,525,359✔
588
        BaseType::init(will_query_ranges);
1,525,359✔
589
        m_nb_needles = m_needles.size();
1,525,359✔
590

591
        if (has_search_index() && m_nb_needles == 0) {
1,525,359✔
592
            SearchIndex* index = ParentNode::m_table->get_search_index(ParentNode::m_condition_column_key);
9,912✔
593
            m_index_evaluator = IndexEvaluator();
9,912✔
594
            m_index_evaluator->init(index, BaseType::m_value);
9,912✔
595
            IntegerNodeBase<LeafType>::m_dT = 0;
9,912✔
596
        }
9,912✔
597
    }
1,525,359✔
598

599
    bool do_consume_condition(ParentNode& node) override
600
    {
30,414✔
601
        auto& other = static_cast<ThisType&>(node);
30,414✔
602
        REALM_ASSERT(this->m_condition_column_key == other.m_condition_column_key);
30,414✔
603
        if (m_needles.empty()) {
30,414✔
604
            m_needles.insert(this->m_value);
24,402✔
605
        }
24,402✔
606
        if (other.m_needles.empty()) {
30,414✔
607
            m_needles.insert(other.m_value);
30,402✔
608
        }
30,402✔
609
        else {
12✔
610
            for (const auto& val : other.m_needles) {
12✔
611
                m_needles.insert(val);
12✔
612
            }
12✔
613
        }
12✔
614
        return true;
30,414✔
615
    }
30,414✔
616

617
    bool has_search_index() const override
618
    {
1,576,194✔
619
        return this->m_table->search_index_type(IntegerNodeBase<LeafType>::m_condition_column_key) ==
1,576,194✔
620
               IndexType::General;
1,576,194✔
621
    }
1,576,194✔
622

623
    const IndexEvaluator* index_based_keys() override
624
    {
1,540,209✔
625
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
1,540,209✔
626
    }
1,540,209✔
627

628
    size_t find_first_local(size_t start, size_t end) override
629
    {
5,449,929✔
630
        REALM_ASSERT(this->m_table);
5,449,929✔
631
        size_t s = realm::npos;
5,449,929✔
632

633
        if (start < end) {
5,449,929✔
634
            if (m_nb_needles) {
5,445,930✔
635
                s = find_first_haystack<22>(*this->m_leaf, m_needles, start, end);
1,629,099✔
636
            }
1,629,099✔
637
            else if (m_index_evaluator) {
3,816,831✔
638
                return m_index_evaluator->do_search_index(BaseType::m_cluster, start, end);
1,764✔
639
            }
1,764✔
640
            else {
3,815,067✔
641
                s = this->m_leaf->template find_first<Equal>(this->m_value, start, end);
3,815,067✔
642
            }
3,815,067✔
643
        }
5,445,930✔
644

645
        return s;
5,448,165✔
646
    }
5,449,929✔
647

648
    size_t find_all_local(size_t start, size_t end) override
649
    {
6,111,981✔
650
        if (m_nb_needles) {
6,111,981✔
651
            return find_all_haystack<22>(*this->m_leaf, m_needles, start, end, ParentNode::m_state);
474✔
652
        }
474✔
653
        return BaseType::template find_all_local<Equal>(start, end);
6,111,507✔
654
    }
6,111,981✔
655

656
    std::string describe(util::serializer::SerialisationState& state) const override
657
    {
12,942✔
658
        REALM_ASSERT(this->m_condition_column_key);
12,942✔
659
        std::string col_descr = state.describe_column(this->m_table, this->m_condition_column_key);
12,942✔
660

661
        if (m_needles.empty()) {
12,942✔
662
            return col_descr + " " + Equal::description() + " " +
12,768✔
663
                   util::serializer::print_value(IntegerNodeBase<LeafType>::m_value);
12,768✔
664
        }
12,768✔
665

666
        std::string list_contents;
174✔
667
        bool is_first = true;
174✔
668
        for (auto it : m_needles) {
378✔
669
            list_contents +=
378✔
670
                util::format("%1%2", is_first ? "" : ", ", util::serializer::print_value(it)); // "it" may be null
378✔
671
            is_first = false;
378✔
672
        }
378✔
673
        std::string desc = util::format("%1 IN {%2}", col_descr, list_contents);
174✔
674
        return desc;
174✔
675
    }
12,942✔
676

677
    std::unique_ptr<ParentNode> clone() const override
678
    {
1,463,115✔
679
        return std::unique_ptr<ParentNode>(new ThisType(*this));
1,463,115✔
680
    }
1,463,115✔
681

682
private:
683
    std::unordered_set<TConditionValue> m_needles;
684
    size_t m_nb_needles = 0;
685
    std::optional<IndexEvaluator> m_index_evaluator;
686

687
    IntegerNode(const IntegerNode<LeafType, Equal>& from)
688
        : BaseType(from)
659,247✔
689
        , m_needles(from.m_needles)
659,247✔
690
    {
1,444,734✔
691
    }
1,444,734✔
692
};
693

694

695
// This node is currently used for floats and doubles only
696
template <class LeafType, class TConditionFunction>
697
class FloatDoubleNode : public ParentNode {
698
public:
699
    using TConditionValue = typename LeafType::value_type;
700
    static const bool special_null_node = false;
701

702
    FloatDoubleNode(TConditionValue v, ColKey column_key)
703
        : m_value(v)
10,860✔
704
    {
21,720✔
705
        m_condition_column_key = column_key;
21,720✔
706
        m_dT = 1.0;
21,720✔
707
    }
21,720✔
708
    FloatDoubleNode(null, ColKey column_key)
709
        : m_value(null::get_null_float<TConditionValue>())
300✔
710
    {
600✔
711
        m_condition_column_key = column_key;
600✔
712
        m_dT = 1.0;
600✔
713
    }
600✔
714

715
    void cluster_changed() override
716
    {
38,292✔
717
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
38,292✔
718
        m_cluster->init_leaf(this->m_condition_column_key, &*m_leaf);
38,292✔
719
    }
38,292✔
720

721
    size_t find_first_local(size_t start, size_t end) override
722
    {
375,528✔
723
        TConditionFunction cond;
375,528✔
724

725
        auto find = [&](bool nullability) {
375,528✔
726
            bool value_nan = nullability ? null::is_null_float(m_value) : false;
375,528!
727
            for (size_t s = start; s < end; ++s) {
3,086,232!
728
                TConditionValue v = m_leaf->get(s);
3,004,368✔
729
                REALM_ASSERT(!(null::is_null_float(v) && !nullability));
3,004,368!
730
                if (cond(v, m_value, nullability ? null::is_null_float<TConditionValue>(v) : false, value_nan))
3,004,368!
731
                    return s;
293,664✔
732
            }
3,004,368✔
733
            return not_found;
81,864✔
734
        };
375,528✔
735

736
        // This will inline the second case but no the first. Todo, use templated lambda when switching to c++14
737
        if (m_table->is_nullable(m_condition_column_key))
375,528!
738
            return find(true);
20,832✔
739
        else
354,696✔
740
            return find(false);
354,696✔
741
    }
375,528✔
742

743
    std::string describe(util::serializer::SerialisationState& state) const override
744
    {
270✔
745
        REALM_ASSERT(m_condition_column_key);
270!
746
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
270✔
747
               util::serializer::print_value(FloatDoubleNode::m_value);
270✔
748
    }
270✔
749
    std::string describe_condition() const override
750
    {
270✔
751
        return TConditionFunction::description();
270✔
752
    }
270✔
753

754
    std::unique_ptr<ParentNode> clone() const override
755
    {
21,210✔
756
        return std::unique_ptr<ParentNode>(new FloatDoubleNode(*this));
21,210✔
757
    }
21,210✔
758

759
    FloatDoubleNode(const FloatDoubleNode& from)
760
        : ParentNode(from)
10,605✔
761
        , m_value(from.m_value)
10,605✔
762
    {
21,210✔
763
    }
21,210✔
764

765
protected:
766
    TConditionValue m_value;
767
    std::optional<LeafType> m_leaf;
768
};
769

770
template <class T, class TConditionFunction>
771
class SizeNode : public ParentNode {
772
public:
773
    SizeNode(int64_t v, ColKey column)
774
        : m_value(v)
6✔
775
    {
12✔
776
        m_condition_column_key = column;
12✔
777
        m_dT = 20.0;
12✔
778
    }
12✔
779

780
    void cluster_changed() override
781
    {
12✔
782
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
12✔
783
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
12✔
784
    }
12✔
785

786
    size_t find_first_local(size_t start, size_t end) override
787
    {
18✔
788
        for (size_t s = start; s < end; ++s) {
78!
789
            if (T v = m_leaf->get(s)) {
72!
790
                int64_t sz = v.size();
24✔
791
                if (TConditionFunction()(sz, m_value))
24!
792
                    return s;
12✔
793
            }
24✔
794
        }
72✔
795
        return not_found;
6✔
796
    }
18✔
797

798
    std::unique_ptr<ParentNode> clone() const override
799
    {
18✔
800
        return std::unique_ptr<ParentNode>(new SizeNode(*this));
18✔
801
    }
18✔
802

803
    SizeNode(const SizeNode& from)
804
        : ParentNode(from)
9✔
805
        , m_value(from.m_value)
9✔
806
    {
18✔
807
    }
18✔
808

809
private:
810
    using LeafType = typename ColumnTypeTraits<T>::cluster_leaf_type;
811
    std::optional<LeafType> m_leaf;
812
    int64_t m_value;
813
};
814

815
extern size_t size_of_list_from_ref(ref_type ref, Allocator& alloc, ColumnType col_type, bool nullable);
816

817
template <class TConditionFunction>
818
class SizeListNode : public ParentNode {
819
public:
820
    SizeListNode(int64_t v, ColKey column)
821
        : m_value(v)
72✔
822
    {
144✔
823
        m_condition_column_key = column;
144✔
824
        m_dT = 30.0;
144✔
825
    }
144✔
826

827
    void reset_cache()
828
    {
288✔
829
        m_cached_col_type = m_condition_column_key.get_type();
288✔
830
        m_cached_nullable = m_condition_column_key.is_nullable();
288✔
831
        REALM_ASSERT_DEBUG(m_condition_column_key.is_list());
288!
832
    }
288✔
833

834
    void cluster_changed() override
835
    {
144✔
836
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
144✔
837
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
144✔
838
        reset_cache();
144✔
839
    }
144✔
840

841
    void init(bool will_query_ranges) override
842
    {
144✔
843
        ParentNode::init(will_query_ranges);
144✔
844
        reset_cache();
144✔
845
    }
144✔
846

847
    size_t find_first_local(size_t start, size_t end) override
848
    {
204✔
849
        Allocator& alloc = m_table.unchecked_ptr()->get_alloc();
204✔
850
        for (size_t s = start; s < end; ++s) {
294✔
851
            if (ref_type ref = m_leaf->get(s)) {
276✔
852
                int64_t sz = size_of_list_from_ref(ref, alloc, m_cached_col_type, m_cached_nullable);
204✔
853
                if (TConditionFunction()(sz, m_value))
204✔
854
                    return s;
186✔
855
            }
204✔
856
        }
276✔
857
        return not_found;
18✔
858
    }
204✔
859

860
    std::unique_ptr<ParentNode> clone() const override
861
    {
162✔
862
        return std::unique_ptr<ParentNode>(new SizeListNode(*this));
162✔
863
    }
162✔
864

865
    SizeListNode(const SizeListNode& from)
866
        : ParentNode(from)
81✔
867
        , m_value(from.m_value)
81✔
868
    {
162✔
869
    }
162✔
870

871
private:
872
    std::optional<ArrayList> m_leaf;
873

874
    int64_t m_value;
875

876
    ColumnType m_cached_col_type;
877
    bool m_cached_nullable;
878
};
879

880

881
template <class TConditionFunction>
882
class BinaryNode : public ParentNode {
883
public:
884
    using TConditionValue = BinaryData;
885
    static const bool special_null_node = false;
886

887
    BinaryNode(BinaryData v, ColKey column)
888
        : m_value(v)
6,141✔
889
    {
12,282✔
890
        m_condition_column_key = column;
12,282✔
891
        m_dT = 100.0;
12,282✔
892
    }
12,282✔
893

894
    BinaryNode(null, ColKey column)
895
        : BinaryNode(BinaryData{}, column)
201✔
896
    {
402✔
897
    }
402✔
898

899
    void cluster_changed() override
900
    {
17,592✔
901
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
17,592✔
902
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
17,592✔
903
    }
17,592✔
904

905
    size_t find_first_local(size_t start, size_t end) override
906
    {
67,020✔
907
        TConditionFunction condition;
67,020✔
908
        for (size_t s = start; s < end; ++s) {
2,540,466!
909
            BinaryData value = m_leaf->get(s);
2,525,232✔
910
            if (condition(m_value.get(), value))
2,525,232!
911
                return s;
51,786✔
912
        }
2,525,232✔
913
        return not_found;
15,234✔
914
    }
67,020✔
915

916
    std::string describe(util::serializer::SerialisationState& state) const override
917
    {
2,286✔
918
        REALM_ASSERT(m_condition_column_key);
2,286!
919
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
2,286✔
920
               TConditionFunction::description() + " " + util::serializer::print_value(BinaryNode::m_value.get());
2,286✔
921
    }
2,286✔
922

923
    std::unique_ptr<ParentNode> clone() const override
924
    {
14,262✔
925
        return std::unique_ptr<ParentNode>(new BinaryNode(*this));
14,262✔
926
    }
14,262✔
927

928
    BinaryNode(const BinaryNode& from)
929
        : ParentNode(from)
7,131✔
930
        , m_value(from.m_value)
7,131✔
931
    {
14,262✔
932
    }
14,262✔
933

934
private:
935
    OwnedBinaryData m_value;
936
    std::optional<ArrayBinary> m_leaf;
937
};
938

939
template <class TConditionFunction>
940
class BoolNode : public ParentNode {
941
public:
942
    using TConditionValue = bool;
943

944
    BoolNode(util::Optional<bool> v, ColKey column)
945
        : m_value(v)
1,035✔
946
    {
2,070✔
947
        m_condition_column_key = column;
2,070✔
948
    }
2,070✔
949

950
    BoolNode(const BoolNode& from)
951
        : ParentNode(from)
1,008✔
952
        , m_value(from.m_value)
1,008✔
953
        , m_index_evaluator(from.m_index_evaluator)
1,008✔
954
    {
2,016✔
955
    }
2,016✔
956

957
    void init(bool will_query_ranges) override
958
    {
3,126✔
959
        ParentNode::init(will_query_ranges);
3,126✔
960

961
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
3,126✔
962
            if (m_index_evaluator) {
1,890✔
963
                SearchIndex* index = m_table->get_search_index(m_condition_column_key);
×
964
                m_index_evaluator->init(index, m_value);
×
965
                this->m_dT = 0;
×
966
            }
×
967
        }
1,890✔
968
    }
3,126✔
969

970
    void table_changed() override
971
    {
2,538✔
972
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
2,538✔
973
            const bool has_index = m_table->search_index_type(m_condition_column_key) == IndexType::General;
1,482✔
974
            m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
1,482✔
975
        }
1,482✔
976
    }
2,538✔
977

978
    const IndexEvaluator* index_based_keys() override
979
    {
2,964✔
980
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
2,964!
981
    }
2,964✔
982

983
    bool has_search_index() const override
984
    {
60✔
985
        return bool(m_index_evaluator);
60✔
986
    }
60✔
987

988
    void cluster_changed() override
989
    {
3,480✔
990
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
3,480✔
991
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
3,480✔
992
    }
3,480✔
993

994
    size_t find_first_local(size_t start, size_t end) override
995
    {
31,236✔
996
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
31,236✔
997
            if (m_index_evaluator) {
29,964✔
998
                return m_index_evaluator->do_search_index(m_cluster, start, end);
×
999
            }
×
1000
        }
29,964✔
1001

1002
        TConditionFunction condition;
29,964✔
1003
        bool m_value_is_null = !m_value;
30,600✔
1004
        for (size_t s = start; s < end; ++s) {
56,664!
1005
            auto value = m_leaf->get(s);
54,942✔
1006
            if (condition(value, m_value, !value, m_value_is_null))
54,942!
1007
                return s;
29,514✔
1008
        }
54,942✔
1009
        return not_found;
1,722✔
1010
    }
30,600✔
1011

1012
    std::string describe(util::serializer::SerialisationState& state) const override
1013
    {
96✔
1014
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
96✔
1015
               TConditionFunction::description() + " " + util::serializer::print_value(m_value);
96✔
1016
    }
96✔
1017

1018
    std::unique_ptr<ParentNode> clone() const override
1019
    {
2,016✔
1020
        return std::unique_ptr<ParentNode>(new BoolNode(*this));
2,016✔
1021
    }
2,016✔
1022

1023
private:
1024
    std::optional<bool> m_value;
1025
    std::optional<ArrayBoolNull> m_leaf;
1026
    std::optional<IndexEvaluator> m_index_evaluator;
1027
};
1028

1029
class TimestampNodeBase : public ParentNode {
1030
public:
1031
    using TConditionValue = Timestamp;
1032
    static const bool special_null_node = false;
1033

1034
    TimestampNodeBase(Timestamp v, ColKey column)
1035
        : m_value(v)
6,723✔
1036
    {
13,446✔
1037
        m_condition_column_key = column;
13,446✔
1038
        m_dT = 2.0;
13,446✔
1039
    }
13,446✔
1040

1041
    TimestampNodeBase(null, ColKey column)
1042
        : TimestampNodeBase(Timestamp{}, column)
144✔
1043
    {
288✔
1044
    }
288✔
1045

1046
    void cluster_changed() override
1047
    {
15,810✔
1048
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
15,810✔
1049
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
15,810✔
1050
    }
15,810✔
1051

1052
protected:
1053
    TimestampNodeBase(const TimestampNodeBase& from)
1054
        : ParentNode(from)
6,360✔
1055
        , m_value(from.m_value)
6,360✔
1056
    {
12,720✔
1057
    }
12,720✔
1058

1059
    Timestamp m_value;
1060
    std::optional<ArrayTimestamp> m_leaf;
1061
};
1062

1063
template <class TConditionFunction>
1064
class TimestampNode : public TimestampNodeBase {
1065
public:
1066
    using TimestampNodeBase::TimestampNodeBase;
1067

1068
    void init(bool will_query_ranges) override
1069
    {
15,942✔
1070
        TimestampNodeBase::init(will_query_ranges);
15,942✔
1071

1072
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
15,942✔
1073
            if (m_index_evaluator) {
9,378✔
1074
                SearchIndex* index =
2,592✔
1075
                    TimestampNodeBase::m_table->get_search_index(TimestampNodeBase::m_condition_column_key);
2,592✔
1076
                m_index_evaluator->init(index, TimestampNodeBase::m_value);
2,592✔
1077
                this->m_dT = 0;
2,592✔
1078
            }
2,592✔
1079
        }
9,378✔
1080
    }
15,942✔
1081

1082
    void table_changed() override
1083
    {
14,814✔
1084
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
14,814✔
1085
            const bool has_index =
9,180✔
1086
                this->m_table->search_index_type(TimestampNodeBase::m_condition_column_key) == IndexType::General;
9,180✔
1087
            m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
9,180✔
1088
        }
9,180✔
1089
    }
14,814✔
1090

1091
    const IndexEvaluator* index_based_keys() override
1092
    {
8,436✔
1093
        return m_index_evaluator ? &*m_index_evaluator : nullptr;
8,436✔
1094
    }
8,436✔
1095

1096
    bool has_search_index() const override
1097
    {
7,242✔
1098
        return bool(m_index_evaluator);
7,242✔
1099
    }
7,242✔
1100

1101
    size_t find_first_local(size_t start, size_t end) override
1102
    {
59,040✔
1103
        if constexpr (std::is_same_v<TConditionFunction, Equal>) {
59,040✔
1104
            if (m_index_evaluator) {
21,024✔
1105
                return m_index_evaluator->do_search_index(this->m_cluster, start, end);
4,902✔
1106
            }
4,902✔
1107
        }
21,024✔
1108
        return m_leaf->find_first<TConditionFunction>(m_value, start, end);
16,122✔
1109
    }
59,040✔
1110

1111
    std::string describe(util::serializer::SerialisationState& state) const override
1112
    {
156✔
1113
        REALM_ASSERT(m_condition_column_key);
156!
1114
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
156✔
1115
               TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value);
156✔
1116
    }
156✔
1117

1118
    std::unique_ptr<ParentNode> clone() const override
1119
    {
12,720✔
1120
        return std::unique_ptr<ParentNode>(new TimestampNode(*this));
12,720✔
1121
    }
12,720✔
1122

1123
protected:
1124
    std::optional<IndexEvaluator> m_index_evaluator;
1125
};
1126

1127
class DecimalNodeBase : public ParentNode {
1128
public:
1129
    using TConditionValue = Decimal128;
1130
    static const bool special_null_node = false;
1131

1132
    DecimalNodeBase(Decimal128 v, ColKey column)
1133
        : m_value(v)
3,117✔
1134
    {
6,234✔
1135
        m_condition_column_key = column;
6,234✔
1136
    }
6,234✔
1137

1138
    DecimalNodeBase(null, ColKey column)
1139
        : DecimalNodeBase(Decimal128{null()}, column)
6✔
1140
    {
12✔
1141
    }
12✔
1142

1143
    void cluster_changed() override
1144
    {
6,228✔
1145
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
6,228✔
1146
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
6,228✔
1147
    }
6,228✔
1148

1149
    void init(bool will_query_ranges) override
1150
    {
6,228✔
1151
        ParentNode::init(will_query_ranges);
6,228✔
1152

1153
        m_dD = 100.0;
6,228✔
1154
    }
6,228✔
1155

1156
protected:
1157
    DecimalNodeBase(const DecimalNodeBase& from)
1158
        : ParentNode(from)
3,780✔
1159
        , m_value(from.m_value)
3,780✔
1160
    {
7,560✔
1161
    }
7,560✔
1162

1163
    Decimal128 m_value;
1164
    std::optional<ArrayDecimal128> m_leaf;
1165
};
1166

1167
template <class TConditionFunction>
1168
class DecimalNode : public DecimalNodeBase {
1169
public:
1170
    using DecimalNodeBase::DecimalNodeBase;
1171

1172
    size_t find_first_local(size_t start, size_t end) override
1173
    {
14,526✔
1174
        TConditionFunction cond;
14,526✔
1175
        bool value_is_null = m_value.is_null();
14,526✔
1176
        for (size_t i = start; i < end; i++) {
1,016,136!
1177
            Decimal128 val = m_leaf->get(i);
1,009,866✔
1178
            if (cond(val, m_value, val.is_null(), value_is_null))
1,009,866!
1179
                return i;
8,256✔
1180
        }
1,009,866✔
1181
        return realm::npos;
6,270✔
1182
    }
14,526✔
1183

1184
    std::string describe(util::serializer::SerialisationState& state) const override
1185
    {
612✔
1186
        REALM_ASSERT(m_condition_column_key);
612!
1187
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " +
612✔
1188
               TConditionFunction::description() + " " + util::serializer::print_value(DecimalNode::m_value);
612✔
1189
    }
612✔
1190

1191
    std::unique_ptr<ParentNode> clone() const override
1192
    {
7,560✔
1193
        return std::unique_ptr<ParentNode>(new DecimalNode(*this));
7,560✔
1194
    }
7,560✔
1195
};
1196

1197
template <class ObjectType, class ArrayType>
1198
class FixedBytesNodeBase : public ParentNode {
1199
public:
1200
    using TConditionValue = ObjectType;
1201
    static const bool special_null_node = false;
1202

1203
    FixedBytesNodeBase(ObjectType v, ColKey column)
1204
        : m_value(v)
322,485✔
1205
    {
644,970✔
1206
        m_condition_column_key = column;
644,970✔
1207
    }
644,970✔
1208

1209
    FixedBytesNodeBase(null, ColKey column)
1210
        : FixedBytesNodeBase(ObjectType{}, column)
183✔
1211
    {
366✔
1212
        m_value_is_null = true;
366✔
1213
    }
366✔
1214

1215
    void cluster_changed() override
1216
    {
216,723✔
1217
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
216,723✔
1218
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
216,723✔
1219
    }
216,723✔
1220

1221
    void init(bool will_query_ranges) override
1222
    {
216,591✔
1223
        ParentNode::init(will_query_ranges);
216,591✔
1224

1225
        m_dD = 100.0;
216,591✔
1226
    }
216,591✔
1227

1228
protected:
1229
    FixedBytesNodeBase(const FixedBytesNodeBase& from)
1230
        : ParentNode(from)
643,686✔
1231
        , m_value(from.m_value)
643,686✔
1232
        , m_value_is_null(from.m_value_is_null)
643,686✔
1233
    {
1,287,372✔
1234
    }
1,287,372✔
1235

1236
    ObjectType m_value;
1237
    std::optional<ArrayType> m_leaf;
1238
    bool m_value_is_null = false;
1239
};
1240

1241
template <class TConditionFunction, class ObjectType, class ArrayType>
1242
class FixedBytesNode : public FixedBytesNodeBase<ObjectType, ArrayType> {
1243
public:
1244
    using FixedBytesNodeBase<ObjectType, ArrayType>::FixedBytesNodeBase;
1245

1246
    size_t find_first_local(size_t start, size_t end) override
1247
    {
100,812✔
1248
        TConditionFunction cond;
100,812✔
1249
        for (size_t i = start; i < end; i++) {
208,584!
1250
            util::Optional<ObjectType> val = this->m_leaf->get(i);
207,696✔
1251
            if (val) {
207,696!
1252
                if (cond(*val, this->m_value, false, this->m_value_is_null))
172,524!
1253
                    return i;
88,248✔
1254
            }
172,524✔
1255
            else {
35,172✔
1256
                if (cond(ObjectType{}, this->m_value, true, this->m_value_is_null))
35,172!
1257
                    return i;
11,676✔
1258
            }
35,172✔
1259
        }
207,696✔
1260
        return realm::npos;
888✔
1261
    }
100,812✔
1262

1263
    std::string describe(util::serializer::SerialisationState& state) const override
1264
    {
396✔
1265
        REALM_ASSERT(this->m_condition_column_key);
396!
1266
        return state.describe_column(ParentNode::m_table, this->m_condition_column_key) + " " +
396✔
1267
               TConditionFunction::description() + " " +
396✔
1268
               (this->m_value_is_null ? util::serializer::print_value(realm::null())
396!
1269
                                      : util::serializer::print_value(this->m_value));
396✔
1270
    }
396✔
1271

1272
    std::unique_ptr<ParentNode> clone() const override
1273
    {
1,632✔
1274
        return std::unique_ptr<ParentNode>(new FixedBytesNode(*this));
1,632✔
1275
    }
1,632✔
1276
};
1277

1278
template <class ObjectType, class ArrayType>
1279
class FixedBytesNode<Equal, ObjectType, ArrayType> : public FixedBytesNodeBase<ObjectType, ArrayType> {
1280
public:
1281
    using FixedBytesNodeBase<ObjectType, ArrayType>::FixedBytesNodeBase;
1282
    using BaseType = FixedBytesNodeBase<ObjectType, ArrayType>;
1283
    using ThisType = FixedBytesNode<Equal, ObjectType, ArrayType>;
1284

1285
    FixedBytesNode(ColKey col, const Mixed* begin, const Mixed* end)
1286
        : BaseType(realm::null(), col)
144✔
1287
    {
288✔
1288
        REALM_ASSERT(begin && end);
288✔
1289
        REALM_ASSERT(begin != end);
288✔
1290
        for (const Mixed* val = begin; val != end; ++val) {
15,120✔
1291
            if (val->is_null()) {
14,832✔
1292
                m_needles.insert(std::nullopt);
36✔
1293
                continue;
36✔
1294
            }
36✔
1295
            if (const ObjectType* v = val->get_if<ObjectType>()) {
14,796✔
1296
                m_needles.insert({*v});
14,742✔
1297
            }
14,742✔
1298
        }
14,796✔
1299
        if (m_needles.size() == 0) {
288✔
1300
            throw InvalidArgument("No arguments to compare to");
×
1301
        }
×
1302
    }
288✔
1303

1304
    void init(bool will_query_ranges) override
1305
    {
215,595✔
1306
        BaseType::init(will_query_ranges);
215,595✔
1307
        m_nb_needles = m_needles.size();
215,595✔
1308

1309
        if (!this->m_value_is_null) {
215,595✔
1310
            m_optional_value = this->m_value;
215,253✔
1311
        }
215,253✔
1312
        if (m_nb_needles == 0 && has_search_index()) {
215,595✔
1313
            m_index_evaluator = std::make_optional(IndexEvaluator{});
214,827✔
1314
            SearchIndex* index = BaseType::m_table->get_search_index(BaseType::m_condition_column_key);
214,827✔
1315
            m_index_evaluator->init(index, m_optional_value);
214,827✔
1316
            this->m_dT = 0;
214,827✔
1317
        }
214,827✔
1318
    }
215,595✔
1319

1320
    const IndexEvaluator* index_based_keys() override
1321
    {
1,308✔
1322
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
1,308✔
1323
    }
1,308✔
1324

1325
    bool has_search_index() const override
1326
    {
857,439✔
1327
        return this->m_table->search_index_type(BaseType::m_condition_column_key) == IndexType::General;
857,439✔
1328
    }
857,439✔
1329

1330
    size_t find_first_local(size_t start, size_t end) override
1331
    {
242,361✔
1332
        REALM_ASSERT(this->m_table);
242,361✔
1333
        size_t s = realm::npos;
242,361✔
1334

1335
        if (start < end) {
242,361✔
1336
            if (m_nb_needles) {
242,361✔
1337
                return find_first_haystack<22>(*this->m_leaf, m_needles, start, end);
14,952✔
1338
            }
14,952✔
1339
            if (m_index_evaluator) {
227,409✔
1340
                return m_index_evaluator->do_search_index(this->m_cluster, start, end);
214,167✔
1341
            }
214,167✔
1342

1343
            if (end - start == 1) {
13,242✔
1344
                if (this->m_leaf->get(start) == m_optional_value) {
138✔
1345
                    s = start;
72✔
1346
                }
72✔
1347
            }
138✔
1348
            else {
13,104✔
1349
                s = this->m_leaf->find_first(m_optional_value, start, end);
13,104✔
1350
            }
13,104✔
1351
        }
13,242✔
1352

1353
        return s;
13,242✔
1354
    }
242,361✔
1355

1356
    bool do_consume_condition(ParentNode& node) override
1357
    {
428,061✔
1358
        auto& other = static_cast<ThisType&>(node);
428,061✔
1359
        REALM_ASSERT(this->m_condition_column_key == other.m_condition_column_key);
428,061✔
1360
        if (m_needles.empty()) {
428,061✔
1361
            m_needles.insert(this->m_value_is_null ? std::nullopt : std::make_optional(this->m_value));
12✔
1362
        }
12✔
1363
        if (other.m_needles.empty()) {
428,061✔
1364
            m_needles.insert(other.m_value_is_null ? std::nullopt : std::make_optional(other.m_value));
428,037✔
1365
        }
428,037✔
1366
        else {
24✔
1367
            for (const auto& val : other.m_needles) {
24✔
1368
                m_needles.insert(val);
24✔
1369
            }
24✔
1370
        }
24✔
1371
        return true;
428,061✔
1372
    }
428,061✔
1373

1374
    std::string describe(util::serializer::SerialisationState& state) const override
1375
    {
660✔
1376
        REALM_ASSERT(this->m_condition_column_key);
660✔
1377
        std::string col_descr = state.describe_column(this->m_table, this->m_condition_column_key);
660✔
1378
        if (m_needles.empty()) {
660✔
1379
            return util::format("%1 %2 %3", col_descr, Equal::description(),
660✔
1380
                                (this->m_value_is_null ? util::serializer::print_value(realm::null())
660✔
1381
                                                       : util::serializer::print_value(this->m_value)));
660✔
1382
        }
660✔
1383
        std::string list_contents;
×
1384
        bool is_first = true;
×
1385
        for (auto it : m_needles) {
×
1386
            list_contents +=
×
1387
                util::format("%1%2", is_first ? "" : ", ", util::serializer::print_value(it)); // "it" may be null
×
1388
            is_first = false;
×
1389
        }
×
1390
        return util::format("%1 IN {%2}", col_descr, list_contents);
×
1391
    }
660✔
1392

1393
    std::unique_ptr<ParentNode> clone() const override
1394
    {
1,285,740✔
1395
        return std::unique_ptr<ParentNode>(new FixedBytesNode(*this));
1,285,740✔
1396
    }
1,285,740✔
1397

1398
protected:
1399
    std::optional<ObjectType> m_optional_value;
1400
    std::optional<IndexEvaluator> m_index_evaluator;
1401
    std::unordered_set<std::optional<ObjectType>> m_needles;
1402
    size_t m_nb_needles = 0;
1403
};
1404

1405

1406
template <typename T>
1407
using ObjectIdNode = FixedBytesNode<T, ObjectId, ArrayObjectIdNull>;
1408
template <typename T>
1409
using UUIDNode = FixedBytesNode<T, UUID, ArrayUUIDNull>;
1410

1411
class MixedNodeBase : public ParentNode {
1412
public:
1413
    using TConditionValue = Mixed;
1414
    static const bool special_null_node = false;
1415

1416
    MixedNodeBase(Mixed v, ColKey column)
1417
        : m_value(v)
3,183✔
1418
        , m_value_is_null(v.is_null())
3,183✔
1419
    {
6,366✔
1420
        REALM_ASSERT(column.get_type() == col_type_Mixed);
6,366✔
1421
        get_ownership();
6,366✔
1422
        m_condition_column_key = column;
6,366✔
1423
    }
6,366✔
1424

1425
    MixedNodeBase(null, ColKey column)
1426
        : MixedNodeBase(Mixed{}, column)
1427
    {
×
1428
        m_value_is_null = true;
×
1429
    }
×
1430

1431
    void cluster_changed() override
1432
    {
3,450✔
1433
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
3,450✔
1434
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
3,450✔
1435
    }
3,450✔
1436

1437
    void init(bool will_query_ranges) override
1438
    {
6,378✔
1439
        ParentNode::init(will_query_ranges);
6,378✔
1440

1441
        m_dD = 100.0;
6,378✔
1442
    }
6,378✔
1443

1444
    std::string describe(util::serializer::SerialisationState& state) const override
1445
    {
246✔
1446
        REALM_ASSERT(m_condition_column_key);
246✔
1447
        std::string value;
246✔
1448
        if (m_value.is_type(type_TypedLink)) {
246✔
1449
            value = util::serializer::print_value(m_value.get<ObjLink>(), state.group);
12✔
1450
        }
12✔
1451
        else {
234✔
1452
            value = util::serializer::print_value(m_value);
234✔
1453
        }
234✔
1454
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + this->describe_condition() +
246✔
1455
               " " + value;
246✔
1456
    }
246✔
1457

1458
protected:
1459
    MixedNodeBase(const MixedNodeBase& from)
1460
        : ParentNode(from)
3,390✔
1461
        , m_value(from.m_value)
3,390✔
1462
        , m_value_is_null(from.m_value_is_null)
3,390✔
1463
    {
6,780✔
1464
        get_ownership();
6,780✔
1465
    }
6,780✔
1466

1467
    void get_ownership()
1468
    {
13,146✔
1469
        if (m_value.is_type(type_String, type_Binary)) {
13,146✔
1470
            auto bin = m_value.export_to_type<BinaryData>();
3,036✔
1471
            m_buffer = OwnedBinaryData(bin.data(), bin.size());
3,036✔
1472
            auto tmp = m_buffer.get();
3,036✔
1473
            if (m_value.is_type(type_String)) {
3,036✔
1474
                m_value = Mixed(StringData(tmp.data(), tmp.size()));
2,508✔
1475
            }
2,508✔
1476
            else {
528✔
1477
                m_value = Mixed(tmp);
528✔
1478
            }
528✔
1479
        }
3,036✔
1480
    }
13,146✔
1481

1482
    QueryValue m_value;
1483
    OwnedBinaryData m_buffer;
1484
    std::optional<ArrayMixed> m_leaf;
1485
    bool m_value_is_null = false;
1486
};
1487

1488
template <class TConditionFunction>
1489
class MixedNode : public MixedNodeBase {
1490
public:
1491
    using MixedNodeBase::MixedNodeBase;
1492

1493
    size_t find_first_local(size_t start, size_t end) override
1494
    {
21,300✔
1495
        TConditionFunction cond;
21,300✔
1496
        for (size_t i = start; i < end; i++) {
80,484✔
1497
            QueryValue val(m_leaf->get(i));
69,600✔
1498
            if constexpr (realm::is_any_v<TConditionFunction, BeginsWith, BeginsWithIns, EndsWith, EndsWithIns, Like,
34,800✔
1499
                                          LikeIns, NotEqualIns, Contains, ContainsIns>) {
55,800✔
1500
                // For some strange reason the parameters are swapped for string conditions
1501
                if (cond(m_value, val))
42,000✔
1502
                    return i;
7,704✔
1503
            }
21,000✔
1504
            else {
27,600✔
1505
                if (cond(val, m_value))
27,600✔
1506
                    return i;
13,128✔
1507
            }
27,600✔
1508
        }
69,600✔
1509
        return realm::npos;
10,884✔
1510
    }
21,300✔
1511

1512
    std::string describe_condition() const override
1513
    {
168✔
1514
        return TConditionFunction::description();
168✔
1515
    }
168✔
1516

1517
    std::unique_ptr<ParentNode> clone() const override
1518
    {
1,056✔
1519
        return std::unique_ptr<ParentNode>(new MixedNode(*this));
1,056✔
1520
    }
1,056✔
1521
};
1522

1523
template <>
1524
class MixedNode<Equal> : public MixedNodeBase {
1525
public:
1526
    MixedNode(Mixed v, ColKey column)
1527
        : MixedNodeBase(v, column)
2,787✔
1528
    {
5,574✔
1529
    }
5,574✔
1530
    MixedNode(const MixedNode<Equal>& other)
1531
        : MixedNodeBase(other)
2,796✔
1532
        , m_index_evaluator(other.m_index_evaluator)
2,796✔
1533
    {
5,592✔
1534
    }
5,592✔
1535
    void init(bool will_query_ranges) override;
1536

1537
    void cluster_changed() override
1538
    {
5,178✔
1539
        // If we use searchindex, we do not need further access to clusters
1540
        if (!has_search_index()) {
5,178✔
1541
            MixedNodeBase::cluster_changed();
2,688✔
1542
        }
2,688✔
1543
    }
5,178✔
1544

1545
    void table_changed() override
1546
    {
5,574✔
1547
        const bool has_index =
5,574✔
1548
            m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General;
5,574✔
1549
        m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
5,574✔
1550
    }
5,574✔
1551

1552
    bool has_search_index() const override
1553
    {
10,026✔
1554
        return bool(m_index_evaluator);
10,026✔
1555
    }
10,026✔
1556

1557
    size_t find_first_local(size_t start, size_t end) override;
1558

1559
    std::string describe_condition() const override
1560
    {
60✔
1561
        return Equal::description();
60✔
1562
    }
60✔
1563

1564
    std::unique_ptr<ParentNode> clone() const override
1565
    {
5,592✔
1566
        return std::unique_ptr<ParentNode>(new MixedNode<Equal>(*this));
5,592✔
1567
    }
5,592✔
1568

1569
protected:
1570
    std::optional<IndexEvaluator> m_index_evaluator;
1571

1572
    const IndexEvaluator* index_based_keys() override
1573
    {
624✔
1574
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
624✔
1575
    }
624✔
1576
};
1577

1578
template <>
1579
class MixedNode<EqualIns> : public MixedNodeBase {
1580
public:
1581
    MixedNode(Mixed v, ColKey column)
1582
        : MixedNodeBase(v, column)
48✔
1583
    {
96✔
1584
    }
96✔
1585
    MixedNode(const MixedNode<EqualIns>& other)
1586
        : MixedNodeBase(other)
66✔
1587
        , m_index_evaluator(other.m_index_evaluator)
66✔
1588
    {
132✔
1589
    }
132✔
1590
    void init(bool will_query_ranges) override;
1591

1592
    size_t find_first_local(size_t start, size_t end) override;
1593

1594
    void cluster_changed() override
1595
    {
66✔
1596
        // If we use searchindex, we do not need further access to clusters
1597
        if (!has_search_index()) {
66✔
1598
            MixedNodeBase::cluster_changed();
66✔
1599
        }
66✔
1600
    }
66✔
1601

1602
    void table_changed() override
1603
    {
96✔
1604
        const bool has_index =
96✔
1605
            m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General;
96✔
1606
        m_index_evaluator = has_index ? std::make_optional(IndexEvaluator{}) : std::nullopt;
96✔
1607
    }
96✔
1608

1609
    bool has_search_index() const override
1610
    {
66✔
1611
        return bool(m_index_evaluator);
66✔
1612
    }
66✔
1613

1614
    std::string describe_condition() const override
1615
    {
18✔
1616
        return EqualIns::description();
18✔
1617
    }
18✔
1618

1619
    std::unique_ptr<ParentNode> clone() const override
1620
    {
132✔
1621
        return std::unique_ptr<ParentNode>(new MixedNode<EqualIns>(*this));
132✔
1622
    }
132✔
1623

1624
protected:
1625
    std::string m_ucase;
1626
    std::string m_lcase;
1627
    std::optional<IndexEvaluator> m_index_evaluator;
1628
    std::vector<ObjKey> m_index_matches;
1629

1630
    const IndexEvaluator* index_based_keys() override
1631
    {
96✔
1632
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
96✔
1633
    }
96✔
1634
};
1635

1636
class StringNodeBase : public ParentNode {
1637
public:
1638
    using TConditionValue = StringData;
1639
    static const bool special_null_node = true;
1640

1641
    StringNodeBase(StringData v, ColKey column)
1642
        : m_value(v.is_null() ? util::none : util::make_optional(std::string(v)))
24,006✔
1643
        , m_string_value(m_value)
24,006✔
1644
    {
48,438✔
1645
        m_condition_column_key = column;
48,438✔
1646
        m_dT = 10.0;
48,438✔
1647
    }
48,438✔
1648

1649
    void table_changed() override
1650
    {
51,990✔
1651
        m_is_string_enum = m_table.unchecked_ptr()->is_enumerated(m_condition_column_key);
51,990✔
1652
    }
51,990✔
1653

1654
    void cluster_changed() override
1655
    {
111,732✔
1656
        m_leaf.emplace(m_table.unchecked_ptr()->get_alloc());
111,732✔
1657
        m_cluster->init_leaf(m_condition_column_key, &*m_leaf);
111,732✔
1658
    }
111,732✔
1659

1660
    void init(bool will_query_ranges) override
1661
    {
81,207✔
1662
        ParentNode::init(will_query_ranges);
81,207✔
1663

1664
        m_probes = 0;
81,207✔
1665
        m_matches = 0;
81,207✔
1666
        m_end_s = 0;
81,207✔
1667
        m_leaf_start = 0;
81,207✔
1668
        m_leaf_end = 0;
81,207✔
1669
    }
81,207✔
1670

1671
    virtual void clear_leaf_state()
1672
    {
16,782✔
1673
        m_leaf.reset();
16,782✔
1674
    }
16,782✔
1675

1676
    StringNodeBase(const StringNodeBase& from)
1677
        : ParentNode(from)
28,158✔
1678
        , m_value(from.m_value)
28,158✔
1679
        , m_string_value(m_value)
28,158✔
1680
        , m_is_string_enum(from.m_is_string_enum)
28,158✔
1681
    {
56,745✔
1682
    }
56,745✔
1683

1684
    std::string describe(util::serializer::SerialisationState& state) const override
1685
    {
6,843✔
1686
        REALM_ASSERT(m_condition_column_key);
6,843✔
1687
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
6,843✔
1688
               util::serializer::print_value(m_string_value);
6,843✔
1689
    }
6,843✔
1690

1691
protected:
1692
    std::optional<std::string> m_value;
1693
    std::optional<ArrayString> m_leaf;
1694
    StringData m_string_value;
1695

1696
    bool m_is_string_enum = false;
1697

1698
    size_t m_end_s = 0;
1699
    size_t m_leaf_start = 0;
1700
    size_t m_leaf_end = 0;
1701

1702
    StringData get_string(size_t s)
1703
    {
408,564✔
1704
        return m_leaf->get(s);
408,564✔
1705
    }
408,564✔
1706
};
1707

1708
// Conditions for strings. Note that Equal is specialized later in this file!
1709
template <class TConditionFunction>
1710
class StringNode : public StringNodeBase {
1711
public:
1712
    constexpr static bool case_sensitive_comparison =
1713
        is_any_v<TConditionFunction, Greater, GreaterEqual, Less, LessEqual>;
1714
    StringNode(StringData v, ColKey column)
1715
        : StringNodeBase(v, column)
4,728✔
1716
    {
9,456✔
1717
        if constexpr (case_sensitive_comparison) {
9,456✔
1718
            return;
276✔
1719
        }
276✔
1720
        auto upper = case_map(v, true);
×
1721
        auto lower = case_map(v, false);
4,728✔
1722
        if (!upper || !lower) {
9,318✔
1723
            throw InvalidArgument(util::format("Malformed UTF-8: %1", v));
×
1724
        }
×
1725
        else {
9,318✔
1726
            m_ucase = std::move(*upper);
9,318✔
1727
            m_lcase = std::move(*lower);
9,318✔
1728
        }
9,318✔
1729
    }
4,728✔
1730

1731
    void init(bool will_query_ranges) override
1732
    {
13,602✔
1733
        StringNodeBase::init(will_query_ranges);
13,602✔
1734
        clear_leaf_state();
13,602✔
1735
    }
13,602✔
1736

1737
    size_t find_first_local(size_t start, size_t end) override
1738
    {
236,214✔
1739
        TConditionFunction cond;
236,214✔
1740

1741
        for (size_t s = start; s < end; ++s) {
445,428✔
1742
            StringData t = get_string(s);
318,036✔
1743

1744
            if constexpr (case_sensitive_comparison) {
318,036✔
1745
                // case insensitive not implemented for: >, >=, <, <=
1746
                if (cond(t, m_string_value))
154,755✔
1747
                    return s;
720✔
1748
            }
690✔
1749
            else {
316,656✔
1750
                if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), t))
316,656✔
1751
                    return s;
223,062✔
1752
            }
316,656✔
1753
        }
318,036✔
1754
        return not_found;
127,392✔
1755
    }
236,214✔
1756

1757
    std::string describe_condition() const override
1758
    {
264✔
1759
        return TConditionFunction::description();
264✔
1760
    }
264✔
1761

1762
    std::unique_ptr<ParentNode> clone() const override
1763
    {
8,808✔
1764
        return std::unique_ptr<ParentNode>(new StringNode<TConditionFunction>(*this));
8,808✔
1765
    }
8,808✔
1766

1767
    StringNode(const StringNode& from)
1768
        : StringNodeBase(from)
4,404✔
1769
        , m_ucase(from.m_ucase)
4,404✔
1770
        , m_lcase(from.m_lcase)
4,404✔
1771
    {
8,808✔
1772
    }
8,808✔
1773

1774
protected:
1775
    std::string m_ucase;
1776
    std::string m_lcase;
1777
};
1778

1779
// Specialization for Contains condition on Strings - we specialize because we can utilize Boyer-Moore
1780
template <>
1781
class StringNode<Contains> : public StringNodeBase {
1782
public:
1783
    StringNode(StringData v, ColKey column)
1784
        : StringNodeBase(v, column)
603✔
1785
        , m_charmap()
603✔
1786
    {
1,206✔
1787
        if (v.size() == 0)
1,206✔
1788
            return;
1,140✔
1789

1790
        // Build a dictionary of char-to-last distances in the search string
1791
        // (zero indicates that the char is not in needle)
1792
        size_t last_char_pos = v.size() - 1;
66✔
1793
        for (size_t i = 0; i < last_char_pos; ++i) {
3,090✔
1794
            // we never jump longer increments than 255 chars, even if needle is longer (to fit in one byte)
1795
            uint8_t jump = last_char_pos - i < 255 ? static_cast<uint8_t>(last_char_pos - i) : 255;
3,024✔
1796

1797
            unsigned char c = v[i];
3,024✔
1798
            m_charmap[c] = jump;
3,024✔
1799
        }
3,024✔
1800
        m_dT = 50.0;
66✔
1801
    }
66✔
1802

1803
    void init(bool will_query_ranges) override
1804
    {
1,746✔
1805
        StringNodeBase::init(will_query_ranges);
1,746✔
1806
        clear_leaf_state();
1,746✔
1807
    }
1,746✔
1808

1809

1810
    size_t find_first_local(size_t start, size_t end) override
1811
    {
33,174✔
1812
        Contains cond;
33,174✔
1813

1814
        for (size_t s = start; s < end; ++s) {
33,498✔
1815
            StringData t = get_string(s);
33,384✔
1816

1817
            if (cond(m_string_value, m_charmap, t))
33,384✔
1818
                return s;
33,060✔
1819
        }
33,384✔
1820
        return not_found;
114✔
1821
    }
33,174✔
1822

1823
    std::string describe_condition() const override
1824
    {
24✔
1825
        return Contains::description();
24✔
1826
    }
24✔
1827

1828

1829
    std::unique_ptr<ParentNode> clone() const override
1830
    {
1,140✔
1831
        return std::unique_ptr<ParentNode>(new StringNode<Contains>(*this));
1,140✔
1832
    }
1,140✔
1833

1834
    StringNode(const StringNode& from)
1835
        : StringNodeBase(from)
570✔
1836
        , m_charmap(from.m_charmap)
570✔
1837
    {
1,140✔
1838
    }
1,140✔
1839

1840
protected:
1841
    std::array<uint8_t, 256> m_charmap;
1842
};
1843

1844
// Specialization for ContainsIns condition on Strings - we specialize because we can utilize Boyer-Moore
1845
template <>
1846
class StringNode<ContainsIns> : public StringNodeBase {
1847
public:
1848
    StringNode(StringData v, ColKey column)
1849
        : StringNodeBase(v, column)
510✔
1850
        , m_charmap()
510✔
1851
    {
1,020✔
1852
        auto upper = case_map(v, true);
1,020✔
1853
        auto lower = case_map(v, false);
1,020✔
1854
        if (!upper || !lower) {
1,020✔
1855
            throw query_parser::InvalidQueryError(util::format("Malformed UTF-8: %1", v));
×
1856
        }
×
1857
        else {
1,020✔
1858
            m_ucase = std::move(*upper);
1,020✔
1859
            m_lcase = std::move(*lower);
1,020✔
1860
        }
1,020✔
1861

1862
        if (v.size() == 0)
1,020✔
1863
            return;
864✔
1864

1865
        // Build a dictionary of char-to-last distances in the search string
1866
        // (zero indicates that the char is not in needle)
1867
        size_t last_char_pos = m_ucase.size() - 1;
156✔
1868
        for (size_t i = 0; i < last_char_pos; ++i) {
3,474✔
1869
            // we never jump longer increments than 255 chars, even if needle is longer (to fit in one byte)
1870
            uint8_t jump = last_char_pos - i < 255 ? static_cast<uint8_t>(last_char_pos - i) : 255;
3,318✔
1871

1872
            unsigned char uc = m_ucase[i];
3,318✔
1873
            unsigned char lc = m_lcase[i];
3,318✔
1874
            m_charmap[uc] = jump;
3,318✔
1875
            m_charmap[lc] = jump;
3,318✔
1876
        }
3,318✔
1877
        m_dT = 75.0;
156✔
1878
    }
156✔
1879

1880
    void init(bool will_query_ranges) override
1881
    {
1,434✔
1882
        StringNodeBase::init(will_query_ranges);
1,434✔
1883
        clear_leaf_state();
1,434✔
1884
    }
1,434✔
1885

1886

1887
    size_t find_first_local(size_t start, size_t end) override
1888
    {
25,992✔
1889
        ContainsIns cond;
25,992✔
1890

1891
        for (size_t s = start; s < end; ++s) {
32,616✔
1892
            StringData t = get_string(s);
32,208✔
1893
            // The current behaviour is to return all results when querying for a null string.
1894
            // See comment above Query_NextGen_StringConditions on why every string including "" contains null.
1895
            if (!bool(m_value)) {
32,208✔
1896
                return s;
24,738✔
1897
            }
24,738✔
1898
            if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), m_charmap, t))
7,470✔
1899
                return s;
846✔
1900
        }
7,470✔
1901
        return not_found;
408✔
1902
    }
25,992✔
1903

1904
    std::string describe_condition() const override
1905
    {
66✔
1906
        return ContainsIns::description();
66✔
1907
    }
66✔
1908

1909
    std::unique_ptr<ParentNode> clone() const override
1910
    {
1,188✔
1911
        return std::unique_ptr<ParentNode>(new StringNode<ContainsIns>(*this));
1,188✔
1912
    }
1,188✔
1913

1914
    StringNode(const StringNode& from)
1915
        : StringNodeBase(from)
594✔
1916
        , m_charmap(from.m_charmap)
594✔
1917
        , m_ucase(from.m_ucase)
594✔
1918
        , m_lcase(from.m_lcase)
594✔
1919
    {
1,188✔
1920
    }
1,188✔
1921

1922
protected:
1923
    std::array<uint8_t, 256> m_charmap;
1924
    std::string m_ucase;
1925
    std::string m_lcase;
1926
};
1927

1928
class StringNodeEqualBase : public StringNodeBase {
1929
public:
1930
    StringNodeEqualBase(StringData v, ColKey column)
1931
        : StringNodeBase(v, column)
18,165✔
1932
    {
36,756✔
1933
    }
36,756✔
1934
    StringNodeEqualBase(const StringNodeEqualBase& from)
1935
        : StringNodeBase(from)
22,590✔
1936
        , m_index_evaluator(from.m_index_evaluator)
22,590✔
1937
    {
45,609✔
1938
    }
45,609✔
1939

1940
    void init(bool) override;
1941

1942
    bool has_search_index() const override
1943
    {
64,368✔
1944
        return bool(m_table.unchecked_ptr()->search_index_type(m_condition_column_key) == IndexType::General);
64,368✔
1945
    }
64,368✔
1946

1947
    void cluster_changed() override
1948
    {
304,947✔
1949
        // If we use searchindex, we do not need further access to clusters
1950
        if (!m_index_evaluator) {
304,947✔
1951
            StringNodeBase::cluster_changed();
92,928✔
1952
        }
92,928✔
1953
    }
304,947✔
1954

1955
    size_t find_first_local(size_t start, size_t end) override;
1956

1957
    std::string describe_condition() const override
1958
    {
6,477✔
1959
        return Equal::description();
6,477✔
1960
    }
6,477✔
1961

1962
    const IndexEvaluator* index_based_keys() override
1963
    {
41,763✔
1964
        return m_index_evaluator ? &(*m_index_evaluator) : nullptr;
41,763✔
1965
    }
41,763✔
1966

1967
protected:
1968
    std::optional<IndexEvaluator> m_index_evaluator;
1969

1970
    inline BinaryData str_to_bin(const StringData& s) noexcept
1971
    {
×
1972
        return BinaryData(s.data(), s.size());
×
1973
    }
×
1974

1975
    virtual void _search_index_init() = 0;
1976
    virtual size_t _find_first_local(size_t start, size_t end) = 0;
1977
};
1978

1979
// Specialization for Equal condition on Strings - we specialize because we can utilize indexes (if they exist) for
1980
// Equal. This specialisation also supports combining other StringNode<Equal> conditions into itself in order to
1981
// optimise the non-indexed linear search that can be happen when many conditions are OR'd together in an "IN" query.
1982
// Future optimization: make specialization for greater, notequal, etc
1983
template <>
1984
class StringNode<Equal> : public StringNodeEqualBase {
1985
public:
1986
    StringNode(StringData v, ColKey column)
1987
        : StringNodeEqualBase(v, column)
17,358✔
1988
    {
35,154✔
1989
    }
35,154✔
1990
    StringNode(ColKey col, const Mixed* begin, const Mixed* end);
1991

1992
    void _search_index_init() override;
1993

1994
    bool do_consume_condition(ParentNode& other) override;
1995

1996
    std::unique_ptr<ParentNode> clone() const override
1997
    {
44,211✔
1998
        return std::unique_ptr<ParentNode>(new StringNode<Equal>(*this));
44,211✔
1999
    }
44,211✔
2000

2001
    std::string describe(util::serializer::SerialisationState& state) const override;
2002

2003
    StringNode(const StringNode& from)
2004
        : StringNodeEqualBase(from)
21,891✔
2005
    {
44,211✔
2006
        for (auto& needle : from.m_needles) {
44,211✔
2007
            if (needle.is_null()) {
744✔
2008
                m_needles.emplace();
102✔
2009
            }
102✔
2010
            else {
642✔
2011
                m_needle_storage.push_back(std::make_unique<char[]>(needle.size()));
642✔
2012
                std::copy(needle.data(), needle.data() + needle.size(), m_needle_storage.back().get());
642✔
2013
                m_needles.insert(StringData(m_needle_storage.back().get(), needle.size()));
642✔
2014
            }
642✔
2015
        }
744✔
2016
    }
44,211✔
2017

2018
private:
2019
    size_t _find_first_local(size_t start, size_t end) override;
2020
    std::unordered_set<StringData> m_needles;
2021
    std::vector<std::unique_ptr<char[]>> m_needle_storage;
2022
};
2023

2024

2025
// Specialization for EqualIns condition on Strings - we specialize because we can utilize indexes (if they exist) for
2026
// EqualIns.
2027
template <>
2028
class StringNode<EqualIns> : public StringNodeEqualBase {
2029
public:
2030
    StringNode(StringData v, ColKey column)
2031
        : StringNodeEqualBase(v, column)
480✔
2032
    {
960✔
2033
        auto upper = case_map(v, true);
960✔
2034
        auto lower = case_map(v, false);
960✔
2035
        if (!upper || !lower) {
960✔
2036
            throw query_parser::InvalidQueryError(util::format("Malformed UTF-8: %1", v));
×
2037
        }
×
2038
        else {
960✔
2039
            m_ucase = std::move(*upper);
960✔
2040
            m_lcase = std::move(*lower);
960✔
2041
        }
960✔
2042
    }
960✔
2043

2044
    void _search_index_init() override;
2045

2046
    std::string describe_condition() const override
2047
    {
12✔
2048
        return EqualIns::description();
12✔
2049
    }
12✔
2050

2051
    std::unique_ptr<ParentNode> clone() const override
2052
    {
930✔
2053
        return std::unique_ptr<ParentNode>(new StringNode(*this));
930✔
2054
    }
930✔
2055

2056
    StringNode(const StringNode& from)
2057
        : StringNodeEqualBase(from)
465✔
2058
        , m_ucase(from.m_ucase)
465✔
2059
        , m_lcase(from.m_lcase)
465✔
2060
    {
930✔
2061
    }
930✔
2062

2063
private:
2064
    std::vector<ObjKey> m_index_matches;
2065
    std::string m_ucase;
2066
    std::string m_lcase;
2067
    std::vector<ObjKey> storage;
2068
    size_t _find_first_local(size_t start, size_t end) override;
2069
};
2070

2071

2072
class StringNodeFulltext : public StringNodeEqualBase {
2073
public:
2074
    StringNodeFulltext(StringData v, ColKey column, std::unique_ptr<LinkMap> lm = {});
2075

2076
    void table_changed() override;
2077

2078
    void _search_index_init() override;
2079

2080
    bool has_search_index() const override
2081
    {
378✔
2082
        return true; // it's a required precondition for fulltext queries
378✔
2083
    }
378✔
2084

2085
    std::unique_ptr<ParentNode> clone() const override
2086
    {
468✔
2087
        return std::unique_ptr<ParentNode>(new StringNodeFulltext(*this));
468✔
2088
    }
468✔
2089

2090
    std::string describe_condition() const override
2091
    {
×
2092
        return "FULLTEXT";
×
2093
    }
×
2094

2095
private:
2096
    std::vector<ObjKey> m_index_matches;
2097
    std::unique_ptr<LinkMap> m_link_map;
2098
    StringNodeFulltext(const StringNodeFulltext&);
2099

2100
    size_t _find_first_local(size_t, size_t) override
2101
    {
×
2102
        REALM_UNREACHABLE();
2103
    }
×
2104
};
2105

2106
// OR node contains at least two node pointers: Two or more conditions to OR
2107
// together in m_conditions, and the next AND condition (if any) in m_child.
2108
//
2109
// For 'second.equal(23).begin_group().first.equal(111).Or().first.equal(222).end_group().third().equal(555)', this
2110
// will first set m_conditions[0] = left-hand-side through constructor, and then later, when .first.equal(222) is
2111
// invoked, invocation will set m_conditions[1] = right-hand-side through Query& Query::Or() (see query.cpp).
2112
// In there, m_child is also set to next AND condition (if any exists) following the OR.
2113
class OrNode : public ParentNode {
2114
public:
2115
    OrNode(std::unique_ptr<ParentNode> condition)
2116
    {
27,102✔
2117
        m_dT = 50.0;
27,102✔
2118
        if (condition)
27,102✔
2119
            m_conditions.emplace_back(std::move(condition));
26,199✔
2120
    }
27,102✔
2121

2122
    OrNode(const OrNode& other)
2123
        : ParentNode(other)
14,313✔
2124
    {
28,329✔
2125
        for (const auto& condition : other.m_conditions) {
732,054✔
2126
            m_conditions.emplace_back(condition->clone());
732,054✔
2127
        }
732,054✔
2128
    }
28,329✔
2129

2130
    void table_changed() override
2131
    {
27,147✔
2132
        for (auto& condition : m_conditions) {
27,147✔
2133
            condition->set_table(m_table);
26,295✔
2134
        }
26,295✔
2135
    }
27,147✔
2136

2137
    void cluster_changed() override
2138
    {
64,872✔
2139
        for (auto& condition : m_conditions) {
339,981✔
2140
            condition->set_cluster(m_cluster);
339,981✔
2141
        }
339,981✔
2142

2143
        m_start.clear();
64,872✔
2144
        m_start.resize(m_conditions.size(), 0);
64,872✔
2145

2146
        m_last.clear();
64,872✔
2147
        m_last.resize(m_conditions.size(), 0);
64,872✔
2148

2149
        m_was_match.clear();
64,872✔
2150
        m_was_match.resize(m_conditions.size(), false);
64,872✔
2151
    }
64,872✔
2152

2153
    std::string describe(util::serializer::SerialisationState& state) const override
2154
    {
594✔
2155
        std::string s;
594✔
2156
        for (size_t i = 0; i < m_conditions.size(); ++i) {
3,006✔
2157
            if (m_conditions[i]) {
2,412✔
2158
                s += m_conditions[i]->describe_expression(state);
2,412✔
2159
                if (i != m_conditions.size() - 1) {
2,412✔
2160
                    s += " or ";
1,818✔
2161
                }
1,818✔
2162
            }
2,412✔
2163
        }
2,412✔
2164
        if (m_conditions.size() > 1) {
594✔
2165
            s = "(" + s + ")";
528✔
2166
        }
528✔
2167
        return s;
594✔
2168
    }
594✔
2169

2170
    void collect_dependencies(std::vector<TableKey>& versions) const override
2171
    {
25,110✔
2172
        for (const auto& cond : m_conditions) {
25,908✔
2173
            cond->collect_dependencies(versions);
25,908✔
2174
        }
25,908✔
2175
    }
25,110✔
2176

2177
    void init(bool will_query_ranges) override
2178
    {
27,192✔
2179
        ParentNode::init(will_query_ranges);
27,192✔
2180
        combine_conditions(!will_query_ranges);
27,192✔
2181

2182
        m_start.clear();
27,192✔
2183
        m_start.resize(m_conditions.size(), 0);
27,192✔
2184

2185
        m_last.clear();
27,192✔
2186
        m_last.resize(m_conditions.size(), 0);
27,192✔
2187

2188
        m_was_match.clear();
27,192✔
2189
        m_was_match.resize(m_conditions.size(), false);
27,192✔
2190

2191
        std::vector<ParentNode*> v;
27,192✔
2192
        for (auto& condition : m_conditions) {
277,035✔
2193
            condition->init(will_query_ranges);
277,035✔
2194
            v.clear();
277,035✔
2195
            condition->gather_children(v);
277,035✔
2196
        }
277,035✔
2197
    }
27,192✔
2198

2199
    size_t find_first_local(size_t start, size_t end) override
2200
    {
4,228,596✔
2201
        if (start >= end)
4,228,596✔
2202
            return not_found;
1,311✔
2203

2204
        size_t index = not_found;
4,227,285✔
2205

2206
        for (size_t c = 0; c < m_conditions.size(); ++c) {
17,736,171✔
2207
            // out of order search; have to discard cached results
2208
            if (start < m_start[c]) {
13,508,886✔
2209
                m_last[c] = 0;
×
2210
                m_was_match[c] = false;
×
2211
            }
×
2212
            // already searched this range and didn't match
2213
            else if (m_last[c] >= end)
13,508,886✔
2214
                continue;
3,593,832✔
2215
            // already search this range and *did* match
2216
            else if (m_was_match[c] && m_last[c] >= start) {
9,915,054✔
2217
                if (index > m_last[c])
4,978,086✔
2218
                    index = m_last[c];
1,340,667✔
2219
                continue;
4,978,086✔
2220
            }
4,978,086✔
2221

2222
            m_start[c] = start;
4,936,968✔
2223
            size_t fmax = std::max(m_last[c], start);
4,936,968✔
2224
            size_t f = m_conditions[c]->find_first(fmax, end);
4,936,968✔
2225
            m_was_match[c] = f != not_found;
4,936,968✔
2226
            m_last[c] = f == not_found ? end : f;
4,936,968✔
2227
            if (f != not_found && index > m_last[c])
4,936,968✔
2228
                index = m_last[c];
3,529,371✔
2229
        }
4,936,968✔
2230

2231
        return index;
4,227,285✔
2232
    }
4,228,596✔
2233

2234
    std::string validate() override
2235
    {
18✔
2236
        if (m_conditions.size() == 0)
18✔
2237
            return "Missing both arguments of OR";
6✔
2238
        if (m_conditions.size() == 1)
12✔
2239
            return "Missing argument of OR";
12✔
2240
        std::string s;
×
2241
        if (m_child != 0)
×
2242
            s = m_child->validate();
×
2243
        if (s != "")
×
2244
            return s;
×
2245
        for (size_t i = 0; i < m_conditions.size(); ++i) {
×
2246
            s = m_conditions[i]->validate();
×
2247
            if (s != "")
×
2248
                return s;
×
2249
        }
×
2250
        return "";
×
2251
    }
×
2252

2253
    std::unique_ptr<ParentNode> clone() const override
2254
    {
28,329✔
2255
        return std::unique_ptr<ParentNode>(new OrNode(*this));
28,329✔
2256
    }
28,329✔
2257

2258
    std::vector<std::unique_ptr<ParentNode>> m_conditions;
2259

2260
private:
2261
    struct ConditionType {
2262
        ConditionType(const ParentNode& node)
2263
            : m_col(node.m_condition_column_key.value)
9,018,777✔
2264
            , m_type(typeid(node))
9,018,777✔
2265
        {
10,426,692✔
2266
        }
10,426,692✔
2267
        int64_t m_col;
2268
        std::type_index m_type;
2269
        bool operator<(const ConditionType& other) const
2270
        {
5,559,825✔
2271
            return this->m_col < other.m_col && this->m_type < other.m_type;
5,559,825✔
2272
        }
5,559,825✔
2273
        bool operator!=(const ConditionType& other) const
2274
        {
247,047✔
2275
            return this->m_col != other.m_col || this->m_type != other.m_type;
247,047✔
2276
        }
247,047✔
2277
    };
2278

2279
    void combine_conditions(bool ignore_indexes)
2280
    {
27,192✔
2281
        // Although ColKey is not unique per table, it is not important to consider
2282
        // the table when sorting here because ParentNode::m_condition_column_key
2283
        // is only a valid ColKey when the node has a direct condition on a column
2284
        // of the table this query is running on. Any link query nodes use a special
2285
        // LinkChain state to store the column path.
2286
        std::sort(m_conditions.begin(), m_conditions.end(), [](auto& a, auto& b) {
5,089,008✔
2287
            return ConditionType(*a) < ConditionType(*b);
5,089,008✔
2288
        });
5,089,008✔
2289

2290
        bool compute_condition_counts = num_conditions_may_need_combination_counts(m_conditions.size());
27,192✔
2291
        util::FlatMap<ConditionType, size_t> condition_type_counts;
27,192✔
2292
        if (compute_condition_counts) {
27,192✔
2293
            for (auto it = m_conditions.begin(); it != m_conditions.end();) {
384✔
2294
                // no need to try to combine anything other than simple nodes that
2295
                // filter directly on a top-level column since the only nodes that
2296
                // support combinations are string/int/uuid/oid <Equal> types
2297
                if (!(*it)->m_condition_column_key) {
192✔
2298
                    ++it;
×
2299
                    continue;
×
2300
                }
×
2301
                ConditionType cur_type(*(*it));
192✔
2302
                auto next = std::upper_bound(it, m_conditions.end(), cur_type,
192✔
2303
                                             [](const ConditionType& a, const std::unique_ptr<ParentNode>& b) {
1,434✔
2304
                                                 return a < ConditionType(*b);
1,434✔
2305
                                             });
1,434✔
2306
                condition_type_counts[cur_type] = next - it;
192✔
2307
                it = next;
192✔
2308
            }
192✔
2309
        }
192✔
2310

2311
        ParentNode* prev = m_conditions.begin()->get();
27,192✔
2312
        std::optional<size_t> cur_type_count;
27,192✔
2313
        if (compute_condition_counts) {
27,192✔
2314
            cur_type_count = condition_type_counts[ConditionType{*prev}];
192✔
2315
        }
192✔
2316
        auto cond = [&](auto& node) {
708,552✔
2317
            if (prev->consume_condition(*node, ignore_indexes, cur_type_count))
708,552✔
2318
                return true;
458,709✔
2319
            prev = &*node;
249,843✔
2320
            if (compute_condition_counts) {
249,843✔
2321
                cur_type_count = condition_type_counts[ConditionType{*prev}];
246,855✔
2322
            }
246,855✔
2323
            return false;
249,843✔
2324
        };
708,552✔
2325
        m_conditions.erase(std::remove_if(m_conditions.begin() + 1, m_conditions.end(), cond), m_conditions.end());
27,192✔
2326
    }
27,192✔
2327

2328
    // start index of the last find for each cond
2329
    std::vector<size_t> m_start;
2330
    // last looked at index of the last find for each cond
2331
    // is a matching index if m_was_match is true
2332
    std::vector<size_t> m_last;
2333
    std::vector<bool> m_was_match;
2334
};
2335

2336

2337
class NotNode : public ParentNode {
2338
public:
2339
    NotNode(std::unique_ptr<ParentNode> condition)
2340
        : m_condition(std::move(condition))
1,101✔
2341
    {
2,202✔
2342
        m_dT = 50.0;
2,202✔
2343
        if (!m_condition) {
2,202✔
2344
            throw query_parser::InvalidQueryError("Missing argument to Not");
×
2345
        }
×
2346
    }
2,202✔
2347

2348
    void table_changed() override
2349
    {
2,244✔
2350
        m_condition->set_table(m_table);
2,244✔
2351
    }
2,244✔
2352

2353
    void cluster_changed() override
2354
    {
2,730✔
2355
        m_condition->set_cluster(m_cluster);
2,730✔
2356
        // Heuristics bookkeeping:
2357
        m_known_range_start = 0;
2,730✔
2358
        m_known_range_end = 0;
2,730✔
2359
        m_first_in_known_range = not_found;
2,730✔
2360
    }
2,730✔
2361

2362
    void init(bool will_query_ranges) override
2363
    {
2,280✔
2364
        ParentNode::init(will_query_ranges);
2,280✔
2365
        std::vector<ParentNode*> v;
2,280✔
2366

2367
        m_condition->init(false);
2,280✔
2368
        v.clear();
2,280✔
2369
        m_condition->gather_children(v);
2,280✔
2370
    }
2,280✔
2371

2372
    size_t find_first_local(size_t start, size_t end) override;
2373

2374
    std::string describe(util::serializer::SerialisationState& state) const override
2375
    {
924✔
2376
        if (m_condition) {
924✔
2377
            return "!(" + m_condition->describe_expression(state) + ")";
924✔
2378
        }
924✔
2379
        return "!()";
×
2380
    }
924✔
2381

2382
    void collect_dependencies(std::vector<TableKey>& versions) const override
2383
    {
48✔
2384
        if (m_condition) {
48✔
2385
            m_condition->collect_dependencies(versions);
48✔
2386
        }
48✔
2387
    }
48✔
2388

2389
    std::unique_ptr<ParentNode> clone() const override
2390
    {
4,080✔
2391
        return std::unique_ptr<ParentNode>(new NotNode(*this));
4,080✔
2392
    }
4,080✔
2393

2394
    NotNode(const NotNode& from)
2395
        : ParentNode(from)
2,040✔
2396
        , m_condition(from.m_condition ? from.m_condition->clone() : nullptr)
2,040✔
2397
        , m_known_range_start(from.m_known_range_start)
2,040✔
2398
        , m_known_range_end(from.m_known_range_end)
2,040✔
2399
        , m_first_in_known_range(from.m_first_in_known_range)
2,040✔
2400
    {
4,080✔
2401
    }
4,080✔
2402

2403
    std::unique_ptr<ParentNode> m_condition;
2404

2405
private:
2406
    // FIXME This heuristic might as well be reused for all condition nodes.
2407
    size_t m_known_range_start;
2408
    size_t m_known_range_end;
2409
    size_t m_first_in_known_range;
2410

2411
    bool evaluate_at(size_t rowndx);
2412
    void update_known(size_t start, size_t end, size_t first);
2413
    size_t find_first_loop(size_t start, size_t end);
2414
    size_t find_first_covers_known(size_t start, size_t end);
2415
    size_t find_first_covered_by_known(size_t start, size_t end);
2416
    size_t find_first_overlap_lower(size_t start, size_t end);
2417
    size_t find_first_overlap_upper(size_t start, size_t end);
2418
    size_t find_first_no_overlap(size_t start, size_t end);
2419
};
2420

2421
// Compare two columns with eachother row-by-row
2422
class TwoColumnsNodeBase : public ParentNode {
2423
public:
2424
    TwoColumnsNodeBase(ColKey column1, ColKey column2)
2425
    {
23,568✔
2426
        m_dT = 100.0;
23,568✔
2427
        m_condition_column_key1 = column1;
23,568✔
2428
        m_condition_column_key2 = column2;
23,568✔
2429
        if (m_condition_column_key1.is_collection() || m_condition_column_key2.is_collection()) {
23,568✔
2430
            throw Exception(ErrorCodes::InvalidQuery,
×
2431
                            util::format("queries comparing two properties are not yet supported for "
×
2432
                                         "collections (list/set/dictionary) (%1 and %2)",
×
2433
                                         ParentNode::m_table->get_column_name(m_condition_column_key1),
×
2434
                                         ParentNode::m_table->get_column_name(m_condition_column_key2)));
×
2435
        }
×
2436
    }
23,568✔
2437

2438
    void table_changed() override
2439
    {
26,808✔
2440
        if (m_table) {
26,808✔
2441
            ParentNode::m_table->check_column(m_condition_column_key1);
26,808✔
2442
            ParentNode::m_table->check_column(m_condition_column_key2);
26,808✔
2443
        }
26,808✔
2444
    }
26,808✔
2445

2446
    static std::unique_ptr<ArrayPayload> update_cached_leaf_pointers_for_column(Allocator& alloc,
2447
                                                                                const ColKey& col_key);
2448
    void cluster_changed() override
2449
    {
30,933✔
2450
        if (!m_leaf1) {
30,933✔
2451
            m_leaf1 =
29,124✔
2452
                update_cached_leaf_pointers_for_column(m_table.unchecked_ptr()->get_alloc(), m_condition_column_key1);
29,124✔
2453
        }
29,124✔
2454
        if (!m_leaf2) {
30,933✔
2455
            m_leaf2 =
29,124✔
2456
                update_cached_leaf_pointers_for_column(m_table.unchecked_ptr()->get_alloc(), m_condition_column_key2);
29,124✔
2457
        }
29,124✔
2458
        m_cluster->init_leaf(m_condition_column_key1, m_leaf1.get());
30,933✔
2459
        m_cluster->init_leaf(m_condition_column_key2, m_leaf2.get());
30,933✔
2460
    }
30,933✔
2461

2462
    std::string describe(util::serializer::SerialisationState& state) const override
2463
    {
×
2464
        REALM_ASSERT(m_condition_column_key1 && m_condition_column_key2);
×
2465
        return state.describe_column(ParentNode::m_table, m_condition_column_key1) + " " + describe_condition() +
×
2466
               " " + state.describe_column(ParentNode::m_table, m_condition_column_key2);
×
2467
    }
×
2468

2469
    TwoColumnsNodeBase(const TwoColumnsNodeBase& from)
2470
        : ParentNode(from)
10,914✔
2471
        , m_condition_column_key1(from.m_condition_column_key1)
10,914✔
2472
        , m_condition_column_key2(from.m_condition_column_key2)
10,914✔
2473
    {
21,828✔
2474
    }
21,828✔
2475

2476
protected:
2477
    ColKey m_condition_column_key1;
2478
    ColKey m_condition_column_key2;
2479
    std::unique_ptr<ArrayPayload> m_leaf1;
2480
    std::unique_ptr<ArrayPayload> m_leaf2;
2481
};
2482

2483

2484
template <class TConditionFunction>
2485
class TwoColumnsNode : public TwoColumnsNodeBase {
2486
public:
2487
    using TwoColumnsNodeBase::TwoColumnsNodeBase;
2488
    size_t find_first_local(size_t start, size_t end) override
2489
    {
292,644✔
2490
        size_t s = start;
292,644✔
2491
        while (s < end) {
772,830✔
2492
            QueryValue v1(m_leaf1->get_any(s));
730,242✔
2493
            QueryValue v2(m_leaf2->get_any(s));
730,242✔
2494
            if (TConditionFunction()(v1, v2))
730,242✔
2495
                return s;
250,056✔
2496
            else
480,186✔
2497
                s++;
480,186✔
2498
        }
730,242✔
2499
        return not_found;
42,588✔
2500
    }
292,644✔
2501

2502
    std::string describe_condition() const override
2503
    {
×
2504
        return TConditionFunction::description();
×
2505
    }
×
2506

2507
    std::unique_ptr<ParentNode> clone() const override
2508
    {
21,828✔
2509
        return std::unique_ptr<ParentNode>(new TwoColumnsNode<TConditionFunction>(*this));
21,828✔
2510
    }
21,828✔
2511
};
2512

2513

2514
// For Next-Generation expressions like col1 / col2 + 123 > col4 * 100.
2515
class ExpressionNode : public ParentNode {
2516
public:
2517
    ExpressionNode(std::unique_ptr<Expression>);
2518

2519
    void init(bool) override;
2520
    size_t find_first_local(size_t start, size_t end) override;
2521

2522
    void table_changed() override;
2523
    void cluster_changed() override;
2524
    void collect_dependencies(std::vector<TableKey>&) const override;
2525

2526
    std::string describe(util::serializer::SerialisationState& state) const override;
2527

2528
    std::unique_ptr<ParentNode> clone() const override;
2529

2530
private:
2531
    ExpressionNode(const ExpressionNode& from);
2532

2533
    std::unique_ptr<Expression> m_expression;
2534
};
2535

2536

2537
class LinksToNodeBase : public ParentNode {
2538
public:
2539
    LinksToNodeBase(ColKey origin_column_key, ObjKey target_key)
2540
        : LinksToNodeBase(origin_column_key, std::vector<ObjKey>{target_key})
6,810✔
2541
    {
13,620✔
2542
    }
13,620✔
2543

2544
    LinksToNodeBase(ColKey origin_column_key, const std::vector<ObjKey>& target_keys)
2545
        : m_target_keys(target_keys)
7,044✔
2546
    {
14,088✔
2547
        m_dT = 50.0;
14,088✔
2548
        m_condition_column_key = origin_column_key;
14,088✔
2549
        auto column_type = origin_column_key.get_type();
14,088✔
2550
        REALM_ASSERT(column_type == col_type_Link);
14,088✔
2551
        REALM_ASSERT(!m_target_keys.empty());
14,088✔
2552
    }
14,088✔
2553

2554
    void cluster_changed() override
2555
    {
50,928✔
2556
        if (m_condition_column_key.is_collection()) {
50,928✔
2557
            m_linklist.emplace(m_table.unchecked_ptr()->get_alloc());
24,924✔
2558
            m_leaf = &*m_linklist;
24,924✔
2559
        }
24,924✔
2560
        else {
26,004✔
2561
            m_list.emplace(m_table.unchecked_ptr()->get_alloc());
26,004✔
2562
            m_leaf = &*m_list;
26,004✔
2563
        }
26,004✔
2564
        m_cluster->init_leaf(this->m_condition_column_key, m_leaf);
50,928✔
2565
    }
50,928✔
2566

2567
    std::string describe(util::serializer::SerialisationState& state) const override
2568
    {
252✔
2569
        REALM_ASSERT(m_condition_column_key);
252✔
2570
        std::string links = m_target_keys.size() > 1 ? "{" : "";
252✔
2571
        Group* g = m_table->get_parent_group();
252✔
2572
        auto target_table_key = m_table->get_opposite_table(m_condition_column_key)->get_key();
252✔
2573
        int cnt = 0;
252✔
2574
        for (auto key : m_target_keys) {
264✔
2575
            if (cnt++) {
264✔
2576
                links += ",";
12✔
2577
            }
12✔
2578
            links += util::serializer::print_value(ObjLink(target_table_key, key), g);
264✔
2579
        }
264✔
2580
        if (m_target_keys.size() > 1) {
252✔
2581
            links += "}";
12✔
2582
        }
12✔
2583
        return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
252✔
2584
               links;
252✔
2585
    }
252✔
2586

2587
protected:
2588
    std::vector<ObjKey> m_target_keys;
2589
    std::optional<ArrayKey> m_list;
2590
    std::optional<ArrayList> m_linklist;
2591
    ArrayPayload* m_leaf = nullptr;
2592

2593
    LinksToNodeBase(const LinksToNodeBase& source)
2594
        : ParentNode(source)
7,686✔
2595
        , m_target_keys(source.m_target_keys)
7,686✔
2596
    {
15,372✔
2597
    }
15,372✔
2598

2599
    ref_type get_ref(size_t i)
2600
    {
6,025,644✔
2601
        if (m_list)
6,025,644✔
2602
            return m_list->get_as_ref(i);
×
2603
        return m_linklist->get(i);
6,025,644✔
2604
    }
6,025,644✔
2605
};
2606

2607
template <class TConditionFunction>
2608
class LinksToNode : public LinksToNodeBase {
2609
public:
2610
    using LinksToNodeBase::LinksToNodeBase;
2611

2612
    std::string describe_condition() const override
2613
    {
252✔
2614
        return TConditionFunction::description();
252✔
2615
    }
252✔
2616

2617
    std::unique_ptr<ParentNode> clone() const override
2618
    {
15,372✔
2619
        return std::unique_ptr<ParentNode>(new LinksToNode<TConditionFunction>(*this));
15,372✔
2620
    }
15,372✔
2621

2622
    size_t find_first_local(size_t start, size_t end) override;
2623
};
2624

2625
} // namespace realm
2626

2627
#endif // REALM_QUERY_ENGINE_HPP
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc