• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2109

07 Mar 2024 01:56PM UTC coverage: 90.918% (+0.01%) from 90.908%
2109

push

Evergreen

web-flow
Fix querying with a path into nested collections with wildcards (#7404)

Comparing a collection with a list could fail if there was wildcards
in the path and therefore multiple collections to compare with right
hand list.

Linklist is implicitly having wildcard in the path, so if linklists is
in the path there will be a similar problem.  Do not merge values
from different objects into a common list in queries.

93972 of 173176 branches covered (54.26%)

323 of 332 new or added lines in 6 files covered. (97.29%)

91 existing lines in 18 files now uncovered.

238503 of 262328 relevant lines covered (90.92%)

6065347.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

94.63
/test/test_index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include "testsettings.hpp"
20
#ifdef TEST_INDEX_STRING
21

22
#include <realm.hpp>
23
#include <realm/index_string.hpp>
24
#include <realm/query_expression.hpp>
25
#include <realm/tokenizer.hpp>
26
#include <realm/util/to_string.hpp>
27
#include <set>
28
#include "test.hpp"
29
#include "util/misc.hpp"
30
#include "util/random.hpp"
31

32
using namespace realm;
33
using namespace util;
34
using namespace realm;
35
using namespace realm::util;
36
using namespace realm::test_util;
37
using unit_test::TestContext;
38

39
// Test independence and thread-safety
40
// -----------------------------------
41
//
42
// All tests must be thread safe and independent of each other. This
43
// is required because it allows for both shuffling of the execution
44
// order and for parallelized testing.
45
//
46
// In particular, avoid using std::rand() since it is not guaranteed
47
// to be thread safe. Instead use the API offered in
48
// `test/util/random.hpp`.
49
//
50
// All files created in tests must use the TEST_PATH macro (or one of
51
// its friends) to obtain a suitable file system path. See
52
// `test/util/test_path.hpp`.
53
//
54
//
55
// Debugging and the ONLY() macro
56
// ------------------------------
57
//
58
// A simple way of disabling all tests except one called `Foo`, is to
59
// replace TEST(Foo) with ONLY(Foo) and then recompile and rerun the
60
// test suite. Note that you can also use filtering by setting the
61
// environment varible `UNITTEST_FILTER`. See `README.md` for more on
62
// this.
63
//
64
// Another way to debug a particular test, is to copy that test into
65
// `experiments/testcase.cpp` and then run `sh build.sh
66
// check-testcase` (or one of its friends) from the command line.
67

68

69
namespace {
70

71
template <typename T>
72
class column {
73
public:
74
    class ColumnTestType {
75
    public:
76
        ColumnTestType(column* owner)
77
            : m_owner(owner)
78
        {
176✔
79
        }
176✔
80
        const SearchIndex* create_search_index()
81
        {
178✔
82
            m_owner->m_table.add_search_index(m_owner->m_col_key);
178✔
83
            return m_owner->m_table.get_search_index(m_owner->m_col_key);
178✔
84
        }
178✔
85
        ObjKey key(size_t ndx) const
86
        {
176✔
87
            return m_keys[ndx];
176✔
88
        }
176✔
89
        size_t size() const
90
        {
294✔
91
            return m_keys.size();
294✔
92
        }
294✔
93
        void add(T value)
94
        {
79,633✔
95
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
79,633✔
96
            m_keys.push_back(k);
79,633✔
97
        }
79,633✔
98
        void add_null()
99
        {
2✔
100
            auto k = m_owner->m_table.create_object().set_null(m_owner->m_col_key).get_key();
2✔
101
            m_keys.push_back(k);
2✔
102
        }
2✔
103
        void set(size_t ndx, T value)
104
        {
44✔
105
            m_owner->m_table.get_object(m_keys[ndx]).set(m_owner->m_col_key, value);
44✔
106
        }
44✔
107
        void insert(size_t ndx, T value)
108
        {
10✔
109
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
10✔
110
            m_keys.insert(m_keys.begin() + ndx, k);
10✔
111
        }
10✔
112
        T get(size_t ndx)
113
        {
2,195,543✔
114
            return m_owner->m_table.get_object(m_keys[ndx]).template get<T>(m_owner->m_col_key);
2,195,543✔
115
        }
2,195,543✔
116
        T get(ObjKey obj_key)
117
        {
152✔
118
            return m_owner->m_table.get_object(obj_key).template get<T>(m_owner->m_col_key);
152✔
119
        }
152✔
120
        void erase(size_t ndx)
121
        {
186✔
122
            m_owner->m_table.remove_object(m_keys[ndx]);
186✔
123
            m_keys.erase(m_keys.begin() + ndx);
186✔
124
        }
186✔
125
        void clear()
126
        {
162✔
127
            m_owner->m_table.clear();
162✔
128
            m_keys.clear();
162✔
129
        }
162✔
130
        size_t find_first(T value) const
131
        {
2,824✔
132
            auto k = m_owner->m_table.find_first(m_owner->m_col_key, value);
2,824✔
133
            if (k == realm::null_key) {
2,824✔
134
                return realm::npos;
1,301✔
135
            }
1,301✔
136
            auto it = std::find(m_keys.begin(), m_keys.end(), k);
1,523✔
137
            return it - m_keys.begin();
1,523✔
138
        }
1,523✔
139
        size_t count(T value) const
140
        {
160✔
141
            return m_owner->m_table.count_string(m_owner->m_col_key, value);
160✔
142
        }
160✔
143
        void verify()
144
        {
24✔
145
            m_owner->m_table.verify();
24✔
146
        }
24✔
147

148
    private:
149
        column* m_owner;
150
        std::vector<ObjKey> m_keys;
151
    };
152

153
    column(bool nullable = false, bool enumerated = false)
154
        : m_column(this)
155
    {
176✔
156
        m_col_key = m_table.add_column(ColumnTypeTraits<T>::id, "values", nullable);
176✔
157
        if (enumerated) {
176✔
158
            m_table.enumerate_string_column(m_col_key);
78✔
159
        }
78✔
160
    }
176✔
161
    ColumnTestType& get_column()
162
    {
176✔
163
        return m_column;
176✔
164
    }
176✔
165

166
private:
167
    Table m_table;
168
    ColKey m_col_key;
169
    ColumnTestType m_column;
170
};
171

172
class string_column : public column<String> {
173
public:
174
    string_column()
175
        : column(false, false)
176
    {
38✔
177
    }
38✔
178
    static bool is_nullable()
179
    {
4✔
180
        return false;
4✔
181
    }
4✔
182
    static bool is_enumerated()
183
    {
4✔
184
        return false;
4✔
185
    }
4✔
186
};
187
class nullable_string_column : public column<String> {
188
public:
189
    nullable_string_column()
190
        : column(true, false)
191
    {
40✔
192
    }
40✔
193
    static bool is_nullable()
194
    {
4✔
195
        return true;
4✔
196
    }
4✔
197
    static bool is_enumerated()
198
    {
4✔
199
        return false;
4✔
200
    }
4✔
201
};
202
class enum_column : public column<String> {
203
public:
204
    enum_column()
205
        : column(false, true)
206
    {
38✔
207
    }
38✔
208
    static bool is_nullable()
209
    {
4✔
210
        return false;
4✔
211
    }
4✔
212
    static bool is_enumerated()
213
    {
4✔
214
        return true;
4✔
215
    }
4✔
216
};
217
class nullable_enum_column : public column<String> {
218
public:
219
    nullable_enum_column()
220
        : column(true, true)
221
    {
40✔
222
    }
40✔
223
    static bool is_nullable()
224
    {
4✔
225
        return true;
4✔
226
    }
4✔
227
    static bool is_enumerated()
228
    {
4✔
229
        return true;
4✔
230
    }
4✔
231
};
232

233
// disable to avoid warnings about not being used - enable when tests
234
// needed them are enabled again
235

236
// strings used by tests
237
const char s1[] = "John";
238
const char s2[] = "Brian";
239
const char s3[] = "Samantha";
240
const char s4[] = "Tom";
241
const char s5[] = "Johnathan";
242
const char s6[] = "Johnny";
243
const char s7[] = "Sam";
244

245
// integers used by integer index tests
246
std::vector<int64_t> ints = {0x1111,     0x11112222, 0x11113333, 0x1111333, 0x111122223333ull, 0x1111222233334ull,
247
                             0x22223333, 0x11112227, 0x11112227, 0x78923};
248

249
using nullable = std::true_type;
250
using non_nullable = std::false_type;
251

252
} // anonymous namespace
253

254
TEST(Tokenizer_Basic)
255
{
2✔
256
    auto tok = realm::Tokenizer::get_instance();
2✔
257

1✔
258
    tok->reset("to be or not to be");
2✔
259
    auto tokens = tok->get_all_tokens();
2✔
260
    CHECK_EQUAL(tokens.size(), 4);
2✔
261

1✔
262
    tok->reset("To be or not to be");
2✔
263
    realm::TokenInfoMap info = tok->get_token_info();
2✔
264
    CHECK_EQUAL(info.size(), 4);
2✔
265
    realm::TokenInfo& i(info["to"]);
2✔
266
    CHECK_EQUAL(i.positions.size(), 2);
2✔
267
    CHECK_EQUAL(i.positions[0], 0);
2✔
268
    CHECK_EQUAL(i.positions[1], 4);
2✔
269
    CHECK_EQUAL(i.ranges.size(), 2);
2✔
270
    CHECK_EQUAL(i.ranges[0].first, 0);
2✔
271
    CHECK_EQUAL(i.ranges[0].second, 2);
2✔
272
    CHECK_EQUAL(i.ranges[1].first, 13);
2✔
273
    CHECK_EQUAL(i.ranges[1].second, 15);
2✔
274

1✔
275
    tok->reset("Jeg gik mig over sø og land");
2✔
276
    info = tok->get_token_info();
2✔
277
    CHECK_EQUAL(info.size(), 7);
2✔
278
    realm::TokenInfo& j(info["sø"]);
2✔
279
    CHECK_EQUAL(j.ranges[0].first, 17);
2✔
280
    CHECK_EQUAL(j.ranges[0].second, 20);
2✔
281

1✔
282
    tok->reset("with-hyphen -term -other-term-plus");
2✔
283
    CHECK(tok->get_all_tokens() == std::set<std::string>({"with", "hyphen", "term", "other", "plus"}));
2✔
284
}
2✔
285

286
TEST(StringIndex_NonIndexable)
287
{
2✔
288
    // Create a column with string values
1✔
289
    Group group;
2✔
290
    TableRef table = group.add_table("table");
2✔
291
    TableRef target_table = group.add_table("target");
2✔
292
    table->add_column(*target_table, "link");
2✔
293
    table->add_column_list(*target_table, "linkList");
2✔
294
    table->add_column(type_Double, "double");
2✔
295
    table->add_column(type_Float, "float");
2✔
296
    table->add_column(type_Binary, "binary");
2✔
297

1✔
298
    for (auto col : table->get_column_keys()) {
10✔
299
        CHECK_LOGIC_ERROR(table->add_search_index(col), ErrorCodes::IllegalOperation);
10✔
300
    }
10✔
301
}
2✔
302

303
TEST_TYPES(StringIndex_BuildIndex, string_column, nullable_string_column, enum_column, nullable_enum_column)
304
{
8✔
305
    TEST_TYPE test_resources;
8✔
306
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
307

4✔
308
    col.add(s1);
8✔
309
    col.add(s2);
8✔
310
    col.add(s3);
8✔
311
    col.add(s4);
8✔
312
    col.add(s1); // duplicate value
8✔
313
    col.add(s5); // common prefix
8✔
314
    col.add(s6); // common prefix
8✔
315

4✔
316
    // Create a new index on column
4✔
317
    const SearchIndex& ndx = *col.create_search_index();
8✔
318

4✔
319
    const ObjKey r1 = ndx.find_first(s1);
8✔
320
    const ObjKey r2 = ndx.find_first(s2);
8✔
321
    const ObjKey r3 = ndx.find_first(s3);
8✔
322
    const ObjKey r4 = ndx.find_first(s4);
8✔
323
    const ObjKey r5 = ndx.find_first(s5);
8✔
324
    const ObjKey r6 = ndx.find_first(s6);
8✔
325

4✔
326
    CHECK_EQUAL(0, r1.value);
8✔
327
    CHECK_EQUAL(1, r2.value);
8✔
328
    CHECK_EQUAL(2, r3.value);
8✔
329
    CHECK_EQUAL(3, r4.value);
8✔
330
    CHECK_EQUAL(5, r5.value);
8✔
331
    CHECK_EQUAL(6, r6.value);
8✔
332
}
8✔
333

334
TEST_TYPES(StringIndex_DeleteAll, string_column, nullable_string_column, enum_column, nullable_enum_column)
335
{
8✔
336
    TEST_TYPE test_resources;
8✔
337
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
338

4✔
339
    col.add(s1);
8✔
340
    col.add(s2);
8✔
341
    col.add(s3);
8✔
342
    col.add(s4);
8✔
343
    col.add(s1); // duplicate value
8✔
344
    col.add(s5); // common prefix
8✔
345
    col.add(s6); // common prefix
8✔
346

4✔
347
    // Create a new index on column
4✔
348
    const SearchIndex& ndx = *col.create_search_index();
8✔
349

4✔
350
    // Delete all entries
4✔
351
    // (reverse order to avoid ref updates)
4✔
352
    col.erase(6);
8✔
353
    col.erase(5);
8✔
354
    col.erase(4);
8✔
355
    col.erase(3);
8✔
356
    col.erase(2);
8✔
357
    col.erase(1);
8✔
358
    col.erase(0);
8✔
359
    CHECK(ndx.is_empty());
8✔
360

4✔
361
    // Re-insert values
4✔
362
    col.add(s1);
8✔
363
    col.add(s2);
8✔
364
    col.add(s3);
8✔
365
    col.add(s4);
8✔
366
    col.add(s1); // duplicate value
8✔
367
    col.add(s5); // common prefix
8✔
368
    col.add(s6); // common prefix
8✔
369

4✔
370
    // Delete all entries
4✔
371
    // (in order to force constant ref updating)
4✔
372
    col.erase(0);
8✔
373
    col.erase(0);
8✔
374
    col.erase(0);
8✔
375
    col.erase(0);
8✔
376
    col.erase(0);
8✔
377
    col.erase(0);
8✔
378
    col.erase(0);
8✔
379
    CHECK(ndx.is_empty());
8✔
380
}
8✔
381

382
TEST_TYPES(StringIndex_Delete, string_column, nullable_string_column, enum_column, nullable_enum_column)
383
{
8✔
384
    TEST_TYPE test_resources;
8✔
385
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
386

4✔
387
    col.add(s1);
8✔
388
    col.add(s2);
8✔
389
    col.add(s3);
8✔
390
    col.add(s4);
8✔
391
    col.add(s1); // duplicate value
8✔
392

4✔
393
    // Create a new index on column
4✔
394
    const SearchIndex& ndx = *col.create_search_index();
8✔
395

4✔
396
    // Delete first item (in index)
4✔
397
    col.erase(1);
8✔
398

4✔
399
    CHECK_EQUAL(0, col.find_first(s1));
8✔
400
    CHECK_EQUAL(1, col.find_first(s3));
8✔
401
    CHECK_EQUAL(2, col.find_first(s4));
8✔
402
    CHECK_EQUAL(null_key, ndx.find_first(s2));
8✔
403

4✔
404
    // Delete last item (in index)
4✔
405
    col.erase(2);
8✔
406

4✔
407
    CHECK_EQUAL(0, col.find_first(s1));
8✔
408
    CHECK_EQUAL(1, col.find_first(s3));
8✔
409
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
410
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
411

4✔
412
    // Delete middle item (in index)
4✔
413
    col.erase(1);
8✔
414

4✔
415
    CHECK_EQUAL(0, col.find_first(s1));
8✔
416
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
417
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
418
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
419

4✔
420
    // Delete all items
4✔
421
    col.erase(0);
8✔
422
    col.erase(0);
8✔
423
    CHECK(ndx.is_empty());
8✔
424
}
8✔
425

426

427
TEST_TYPES(StringIndex_ClearEmpty, string_column, nullable_string_column, enum_column, nullable_enum_column)
428
{
8✔
429
    TEST_TYPE test_resources;
8✔
430
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
431

4✔
432
    // Create a new index on column
4✔
433
    const SearchIndex& ndx = *col.create_search_index();
8✔
434

4✔
435
    // Clear to remove all entries
4✔
436
    col.clear();
8✔
437
    CHECK(ndx.is_empty());
8✔
438
}
8✔
439

440
TEST_TYPES(StringIndex_Clear, string_column, nullable_string_column, enum_column, nullable_enum_column)
441
{
8✔
442
    TEST_TYPE test_resources;
8✔
443
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
444

4✔
445
    col.add(s1);
8✔
446
    col.add(s2);
8✔
447
    col.add(s3);
8✔
448
    col.add(s4);
8✔
449
    col.add(s1); // duplicate value
8✔
450
    col.add(s5); // common prefix
8✔
451
    col.add(s6); // common prefix
8✔
452

4✔
453
    // Create a new index on column
4✔
454
    const SearchIndex& ndx = *col.create_search_index();
8✔
455

4✔
456
    // Clear to remove all entries
4✔
457
    col.clear();
8✔
458
    CHECK(ndx.is_empty());
8✔
459

4✔
460
    // Re-insert values
4✔
461
    col.add(s1);
8✔
462
    col.add(s2);
8✔
463
    col.add(s3);
8✔
464
    col.add(s4);
8✔
465
    col.add(s1); // duplicate value
8✔
466
    col.add(s5); // common prefix
8✔
467
    col.add(s6); // common prefix
8✔
468

4✔
469
    const ObjKey r1 = ndx.find_first(s1);
8✔
470
    const ObjKey r2 = ndx.find_first(s2);
8✔
471
    const ObjKey r3 = ndx.find_first(s3);
8✔
472
    const ObjKey r4 = ndx.find_first(s4);
8✔
473
    const ObjKey r5 = ndx.find_first(s5);
8✔
474
    const ObjKey r6 = ndx.find_first(s6);
8✔
475

4✔
476
    CHECK_EQUAL(col.key(0), r1);
8✔
477
    CHECK_EQUAL(col.key(1), r2);
8✔
478
    CHECK_EQUAL(col.key(2), r3);
8✔
479
    CHECK_EQUAL(col.key(3), r4);
8✔
480
    CHECK_EQUAL(col.key(5), r5);
8✔
481
    CHECK_EQUAL(col.key(6), r6);
8✔
482
}
8✔
483

484

485
TEST_TYPES(StringIndex_Set, string_column, nullable_string_column, enum_column, nullable_enum_column)
486
{
8✔
487
    TEST_TYPE test_resources;
8✔
488
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
489

4✔
490
    col.add(s1);
8✔
491
    col.add(s2);
8✔
492
    col.add(s3);
8✔
493
    col.add(s4);
8✔
494
    col.add(s1); // duplicate value
8✔
495

4✔
496
    // Create a new index on column
4✔
497
    col.create_search_index();
8✔
498

4✔
499
    // Set top value
4✔
500
    col.set(0, s5);
8✔
501

4✔
502
    CHECK_EQUAL(0, col.find_first(s5));
8✔
503
    CHECK_EQUAL(1, col.find_first(s2));
8✔
504
    CHECK_EQUAL(2, col.find_first(s3));
8✔
505
    CHECK_EQUAL(3, col.find_first(s4));
8✔
506
    CHECK_EQUAL(4, col.find_first(s1));
8✔
507

4✔
508
    // Set bottom value
4✔
509
    col.set(4, s6);
8✔
510

4✔
511
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
512
    CHECK_EQUAL(0, col.find_first(s5));
8✔
513
    CHECK_EQUAL(1, col.find_first(s2));
8✔
514
    CHECK_EQUAL(2, col.find_first(s3));
8✔
515
    CHECK_EQUAL(3, col.find_first(s4));
8✔
516
    CHECK_EQUAL(4, col.find_first(s6));
8✔
517

4✔
518
    // Set middle value
4✔
519
    col.set(2, s7);
8✔
520

4✔
521
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
522
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
523
    CHECK_EQUAL(0, col.find_first(s5));
8✔
524
    CHECK_EQUAL(1, col.find_first(s2));
8✔
525
    CHECK_EQUAL(2, col.find_first(s7));
8✔
526
    CHECK_EQUAL(3, col.find_first(s4));
8✔
527
    CHECK_EQUAL(4, col.find_first(s6));
8✔
528
}
8✔
529

530
TEST_TYPES(StringIndex_Count, string_column, nullable_string_column, enum_column, nullable_enum_column)
531
{
8✔
532
    TEST_TYPE test_resources;
8✔
533
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
534

4✔
535
    col.add(s1);
8✔
536
    col.add(s2);
8✔
537
    col.add(s2);
8✔
538
    col.add(s3);
8✔
539
    col.add(s3);
8✔
540
    col.add(s3);
8✔
541
    col.add(s4);
8✔
542
    col.add(s4);
8✔
543
    col.add(s4);
8✔
544
    col.add(s4);
8✔
545

4✔
546
    // Create a new index on column
4✔
547
    col.create_search_index();
8✔
548

4✔
549
    // Counts
4✔
550
    const size_t c0 = col.count(s5);
8✔
551
    const size_t c1 = col.count(s1);
8✔
552
    const size_t c2 = col.count(s2);
8✔
553
    const size_t c3 = col.count(s3);
8✔
554
    const size_t c4 = col.count(s4);
8✔
555
    CHECK_EQUAL(0, c0);
8✔
556
    CHECK_EQUAL(1, c1);
8✔
557
    CHECK_EQUAL(2, c2);
8✔
558
    CHECK_EQUAL(3, c3);
8✔
559
    CHECK_EQUAL(4, c4);
8✔
560
}
8✔
561

562
TEST_TYPES(StringIndex_Distinct, string_column, nullable_string_column, enum_column, nullable_enum_column)
563
{
8✔
564
    TEST_TYPE test_resources;
8✔
565
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
566

4✔
567
    col.add(s1);
8✔
568
    col.add(s2);
8✔
569
    col.add(s2);
8✔
570
    col.add(s3);
8✔
571
    col.add(s3);
8✔
572
    col.add(s3);
8✔
573
    col.add(s4);
8✔
574
    col.add(s4);
8✔
575
    col.add(s4);
8✔
576
    col.add(s4);
8✔
577

4✔
578
    // Create a new index on column
4✔
579
    const SearchIndex* ndx = col.create_search_index();
8✔
580
    CHECK(ndx->has_duplicate_values());
8✔
581
}
8✔
582

583
TEST_TYPES(StringIndex_FindAllNoCopy, string_column, nullable_string_column, enum_column, nullable_enum_column)
584
{
8✔
585
    TEST_TYPE test_resources;
8✔
586
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
587

4✔
588
    col.add(s1);
8✔
589
    col.add(s2);
8✔
590
    col.add(s2);
8✔
591
    col.add(s3);
8✔
592
    col.add(s3);
8✔
593
    col.add(s3);
8✔
594
    col.add(s4);
8✔
595
    col.add(s4);
8✔
596
    col.add(s4);
8✔
597
    col.add(s4);
8✔
598

4✔
599
    // Create a new index on column
4✔
600
    const SearchIndex& ndx = *col.create_search_index();
8✔
601

4✔
602
    InternalFindResult ref_2;
8✔
603
    FindRes res1 = ndx.find_all_no_copy(StringData("not there"), ref_2);
8✔
604
    CHECK_EQUAL(FindRes_not_found, res1);
8✔
605

4✔
606
    FindRes res2 = ndx.find_all_no_copy(s1, ref_2);
8✔
607
    CHECK_EQUAL(FindRes_single, res2);
8✔
608
    CHECK_EQUAL(0, ref_2.payload);
8✔
609

4✔
610
    FindRes res3 = ndx.find_all_no_copy(s4, ref_2);
8✔
611
    CHECK_EQUAL(FindRes_column, res3);
8✔
612
    BPlusTree<ObjKey> results(Allocator::get_default());
8✔
613
    results.init_from_ref(ref_type(ref_2.payload));
8✔
614

4✔
615
    CHECK_EQUAL(4, ref_2.end_ndx - ref_2.start_ndx);
8✔
616
    CHECK_EQUAL(4, results.size());
8✔
617
    CHECK_EQUAL(col.key(6), results.get(0));
8✔
618
    CHECK_EQUAL(col.key(7), results.get(1));
8✔
619
    CHECK_EQUAL(col.key(8), results.get(2));
8✔
620
    CHECK_EQUAL(col.key(9), results.get(3));
8✔
621
}
8✔
622

623
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
624
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
625
// of copying it to you, as a performance optimization.
626
TEST(StringIndex_FindAllNoCopy2_Int)
627
{
2✔
628
    // Create a column with duplcate values
1✔
629
    column<Int> test_resources;
2✔
630
    auto col = test_resources.get_column();
2✔
631

1✔
632
    for (auto i : ints)
2✔
633
        col.add(i);
20✔
634

1✔
635
    // Create a new index on column
1✔
636
    col.create_search_index();
2✔
637
    const SearchIndex& ndx = *col.create_search_index();
2✔
638
    InternalFindResult results;
2✔
639

1✔
640
    for (auto i : ints) {
20✔
641
        FindRes res = ndx.find_all_no_copy(i, results);
20✔
642

10✔
643
        size_t real = 0;
20✔
644
        for (auto j : ints) {
200✔
645
            if (i == j)
200✔
646
                real++;
24✔
647
        }
200✔
648

10✔
649
        if (real == 1) {
20✔
650
            CHECK_EQUAL(res, FindRes_single);
16✔
651
            CHECK_EQUAL(i, ints[size_t(results.payload)]);
16✔
652
        }
16✔
653
        else if (real > 1) {
4✔
654
            CHECK_EQUAL(FindRes_column, res);
4✔
655
            const IntegerColumn results_column(Allocator::get_default(), ref_type(results.payload));
4✔
656
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
4✔
657
            CHECK_EQUAL(real, results_column.size());
4✔
658
            for (size_t y = 0; y < real; y++)
12✔
659
                CHECK_EQUAL(i, ints[size_t(results_column.get(y))]);
8✔
660
        }
4✔
661
    }
20✔
662
}
2✔
663

664
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
665
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
666
// of copying it to you, as a performance optimization.
667
TEST(StringIndex_FindAllNoCopy2_IntNull)
668
{
2✔
669
    // Create a column with duplcate values
1✔
670
    column<Int> test_resources(true);
2✔
671
    auto col = test_resources.get_column();
2✔
672

1✔
673
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++)
8✔
674
        col.add(ints[t]);
6✔
675
    col.add_null();
2✔
676

1✔
677
    // Create a new index on column
1✔
678
    const SearchIndex& ndx = *col.create_search_index();
2✔
679
    InternalFindResult results;
2✔
680

1✔
681
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++) {
8✔
682
        FindRes res = ndx.find_all_no_copy(ints[t], results);
6✔
683

3✔
684
        size_t real = 0;
6✔
685
        for (size_t y = 0; y < sizeof(ints) / sizeof(ints[0]); y++) {
24✔
686
            if (ints[t] == ints[y])
18✔
687
                real++;
6✔
688
        }
18✔
689

3✔
690
        if (real == 1) {
6✔
691
            CHECK_EQUAL(res, FindRes_single);
6✔
692
            CHECK_EQUAL(ints[t], ints[size_t(results.payload)]);
6✔
693
        }
6✔
694
        else if (real > 1) {
×
695
            CHECK_EQUAL(FindRes_column, res);
×
696
            const IntegerColumn results2(Allocator::get_default(), ref_type(results.payload));
×
697
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
×
698
            CHECK_EQUAL(real, results2.size());
×
699
            for (size_t y = 0; y < real; y++)
×
700
                CHECK_EQUAL(ints[t], ints[size_t(results2.get(y))]);
×
701
        }
×
702
    }
6✔
703

1✔
704
    FindRes res = ndx.find_all_no_copy(null{}, results);
2✔
705
    CHECK_EQUAL(FindRes_single, res);
2✔
706
    CHECK_EQUAL(results.payload, col.size() - 1);
2✔
707
}
2✔
708

709
TEST_TYPES(StringIndex_FindAllNoCopyCommonPrefixStrings, string_column, nullable_string_column, enum_column,
710
           nullable_enum_column)
711
{
8✔
712
    TEST_TYPE test_resources;
8✔
713
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
714
    const SearchIndex& ndx = *col.create_search_index();
8✔
715

4✔
716
    auto test_prefix_find = [&](std::string prefix) {
24✔
717
        std::string prefix_b = prefix + "b";
24✔
718
        std::string prefix_c = prefix + "c";
24✔
719
        std::string prefix_d = prefix + "d";
24✔
720
        std::string prefix_e = prefix + "e";
24✔
721
        StringData spb(prefix_b);
24✔
722
        StringData spc(prefix_c);
24✔
723
        StringData spd(prefix_d);
24✔
724
        StringData spe(prefix_e);
24✔
725

12✔
726
        size_t start_row = col.size();
24✔
727
        col.add(spb);
24✔
728
        col.add(spc);
24✔
729
        col.add(spc);
24✔
730
        col.add(spe);
24✔
731
        col.add(spe);
24✔
732
        col.add(spe);
24✔
733

12✔
734
        InternalFindResult results;
24✔
735
        FindRes res = ndx.find_all_no_copy(spb, results);
24✔
736
        CHECK_EQUAL(res, FindRes_single);
24✔
737
        CHECK_EQUAL(results.payload, start_row);
24✔
738

12✔
739
        res = ndx.find_all_no_copy(spc, results);
24✔
740
        CHECK_EQUAL(res, FindRes_column);
24✔
741
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 2);
24✔
742
        const IntegerColumn results_c(Allocator::get_default(), ref_type(results.payload));
24✔
743
        CHECK_EQUAL(results_c.get(results.start_ndx), start_row + 1);
24✔
744
        CHECK_EQUAL(results_c.get(results.start_ndx + 1), start_row + 2);
24✔
745
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx))), spc);
24✔
746
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx + 1))), spc);
24✔
747

12✔
748
        res = ndx.find_all_no_copy(spd, results);
24✔
749
        CHECK_EQUAL(res, FindRes_not_found);
24✔
750

12✔
751
        res = ndx.find_all_no_copy(spe, results);
24✔
752
        CHECK_EQUAL(res, FindRes_column);
24✔
753
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 3);
24✔
754
        const IntegerColumn results_e(Allocator::get_default(), ref_type(results.payload));
24✔
755
        CHECK_EQUAL(results_e.get(results.start_ndx), start_row + 3);
24✔
756
        CHECK_EQUAL(results_e.get(results.start_ndx + 1), start_row + 4);
24✔
757
        CHECK_EQUAL(results_e.get(results.start_ndx + 2), start_row + 5);
24✔
758
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx))), spe);
24✔
759
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 1))), spe);
24✔
760
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 2))), spe);
24✔
761
    };
24✔
762

4✔
763
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
764
    std::string std_over_max = std_max + "a";
8✔
765
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
766

4✔
767
    test_prefix_find(std_max);
8✔
768
    test_prefix_find(std_over_max);
8✔
769
    test_prefix_find(std_under_max);
8✔
770
}
8✔
771

772
TEST(StringIndex_Count_Int)
773
{
2✔
774
    // Create a column with duplicate values
1✔
775
    column<Int> test_resources;
2✔
776
    auto col = test_resources.get_column();
2✔
777

1✔
778
    for (auto i : ints)
2✔
779
        col.add(i);
20✔
780

1✔
781
    // Create a new index on column
1✔
782
    const SearchIndex& ndx = *col.create_search_index();
2✔
783

1✔
784
    for (auto i : ints) {
20✔
785
        size_t count = ndx.count(i);
20✔
786

10✔
787
        size_t real = 0;
20✔
788
        for (auto j : ints) {
200✔
789
            if (i == j)
200✔
790
                real++;
24✔
791
        }
200✔
792

10✔
793
        CHECK_EQUAL(real, count);
20✔
794
    }
20✔
795
}
2✔
796

797

798
TEST(StringIndex_Distinct_Int)
799
{
2✔
800
    // Create a column with duplicate values
1✔
801
    column<Int> test_resources;
2✔
802
    auto col = test_resources.get_column();
2✔
803

1✔
804
    for (auto i : ints)
2✔
805
        col.add(i);
20✔
806

1✔
807
    // Create a new index on column
1✔
808
    auto ndx = col.create_search_index();
2✔
809
    CHECK(ndx->has_duplicate_values());
2✔
810
}
2✔
811

812

813
TEST(StringIndex_Set_Add_Erase_Insert_Int)
814
{
2✔
815
    column<Int> test_resources;
2✔
816
    auto col = test_resources.get_column();
2✔
817

1✔
818
    col.add(1);
2✔
819
    col.add(2);
2✔
820
    col.add(3);
2✔
821
    col.add(2);
2✔
822

1✔
823
    // Create a new index on column
1✔
824
    const SearchIndex& ndx = *col.create_search_index();
2✔
825

1✔
826
    ObjKey f = ndx.find_first(int64_t(2));
2✔
827
    CHECK_EQUAL(col.key(1), f);
2✔
828

1✔
829
    col.set(1, 5);
2✔
830

1✔
831
    f = ndx.find_first(int64_t(2));
2✔
832
    CHECK_EQUAL(col.key(3), f);
2✔
833

1✔
834
    col.erase(1);
2✔
835

1✔
836
    f = ndx.find_first(int64_t(2));
2✔
837
    CHECK_EQUAL(col.key(2), f);
2✔
838

1✔
839
    col.insert(1, 5);
2✔
840
    CHECK_EQUAL(col.get(1), 5);
2✔
841

1✔
842
    f = ndx.find_first(int64_t(2));
2✔
843
    CHECK_EQUAL(col.key(3), f);
2✔
844

1✔
845
    col.add(7);
2✔
846
    CHECK_EQUAL(col.get(4), 7);
2✔
847
    col.set(4, 10);
2✔
848
    CHECK_EQUAL(col.get(4), 10);
2✔
849

1✔
850
    f = ndx.find_first(int64_t(10));
2✔
851
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
852

1✔
853
    col.add(9);
2✔
854
    f = ndx.find_first(int64_t(9));
2✔
855
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
856

1✔
857
    col.clear();
2✔
858
    f = ndx.find_first(int64_t(2));
2✔
859
    CHECK_EQUAL(null_key, f);
2✔
860
}
2✔
861

862
TEST(StringIndex_FuzzyTest_Int)
863
{
2✔
864
    column<Int> test_resources;
2✔
865
    auto col = test_resources.get_column();
2✔
866
    Random random(random_int<unsigned long>());
2✔
867
    const size_t n = static_cast<size_t>(1.2 * REALM_MAX_BPNODE_SIZE);
2✔
868

1✔
869
    col.create_search_index();
2✔
870

1✔
871
    for (size_t t = 0; t < n; ++t) {
2,402✔
872
        col.add(random.draw_int_max(0xffffffffffffffff));
2,400✔
873
    }
2,400✔
874

1✔
875
    for (size_t t = 0; t < n; ++t) {
2,402✔
876
        int64_t r;
2,400✔
877
        if (random.draw_bool())
2,400✔
878
            r = col.get(t);
1,163✔
879
        else
1,237✔
880
            r = random.draw_int_max(0xffffffffffffffff);
1,237✔
881

1,200✔
882
        size_t m = col.find_first(r);
2,400✔
883
        for (size_t t_2 = 0; t_2 < n; ++t_2) {
2,195,371✔
884
            if (col.get(t_2) == r) {
2,194,134✔
885
                CHECK_EQUAL(t_2, m);
1,163✔
886
                break;
1,163✔
887
            }
1,163✔
888
        }
2,194,134✔
889
    }
2,400✔
890
}
2✔
891

892
namespace {
893

894
// Generate string where the bit pattern in bits is converted to NUL bytes. E.g. (length=2):
895
// bits=0 -> "\0\0", bits=1 -> "\x\0", bits=2 -> "\0\x", bits=3 -> "\x\x", where x is a random byte
896
StringData create_string_with_nuls(const size_t bits, const size_t length, char* tmp, Random& random)
897
{
×
898
    for (size_t i = 0; i < length; ++i) {
×
899
        bool insert_nul_at_pos = (bits & (size_t(1) << i)) == 0;
×
900
        if (insert_nul_at_pos) {
×
901
            tmp[i] = '\0';
×
902
        }
×
903
        else {
×
904
            // Avoid stray \0 chars, since we are already testing all combinations.
905
            // All casts are necessary to preserve the bitpattern.
906
            tmp[i] = static_cast<char>(static_cast<unsigned char>(random.draw_int<unsigned int>(1, UCHAR_MAX)));
×
907
        }
×
908
    }
×
909
    return StringData(tmp, length);
×
910
}
×
911

912
} // anonymous namespace
913

914

915
// Test for generated strings of length 1..16 with all combinations of embedded NUL bytes
916
TEST_TYPES_IF(StringIndex_EmbeddedZeroesCombinations, TEST_DURATION > 1, string_column, nullable_string_column)
917
{
×
918
    TEST_TYPE test_resources;
×
919
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
920
    const SearchIndex& ndx = *col.create_search_index();
×
921

922
    constexpr unsigned int seed = 42;
×
923
    const size_t MAX_LENGTH = 16; // Test medium
×
924
    char tmp[MAX_LENGTH];         // this is a bit of a hack, that relies on the string being copied in column.add()
×
925

926
    for (size_t length = 1; length <= MAX_LENGTH; ++length) {
×
927

928
        {
×
929
            Random random(seed);
×
930
            const size_t combinations = size_t(1) << length;
×
931
            for (size_t i = 0; i < combinations; ++i) {
×
932
                StringData str = create_string_with_nuls(i, length, tmp, random);
×
933
                col.add(str);
×
934
            }
×
935
        }
×
936

937
        // check index up to this length
938
        size_t expected_index = 0;
×
939
        for (size_t l = 1; l <= length; ++l) {
×
940
            Random random(seed);
×
941
            const size_t combinations = size_t(1) << l;
×
942
            for (size_t i = 0; i < combinations; ++i) {
×
943
                StringData needle = create_string_with_nuls(i, l, tmp, random);
×
944
                CHECK_EQUAL(ndx.find_first(needle), col.key(expected_index));
×
945
                CHECK(strncmp(col.get(expected_index).data(), needle.data(), l) == 0);
×
946
                CHECK_EQUAL(col.get(expected_index).size(), needle.size());
×
947
                expected_index++;
×
948
            }
×
949
        }
×
950
    }
×
951
}
×
952

953
// Tests for a bug with strings containing zeroes
954
TEST_TYPES(StringIndex_EmbeddedZeroes, string_column, nullable_string_column, enum_column, nullable_enum_column)
955
{
8✔
956
    TEST_TYPE test_resources;
8✔
957
    typename TEST_TYPE::ColumnTestType& col2 = test_resources.get_column();
8✔
958
    const SearchIndex& ndx2 = *col2.create_search_index();
8✔
959

4✔
960
    // FIXME: re-enable once embedded nuls work
4✔
961
    col2.add(StringData("\0", 1));
8✔
962
    col2.add(StringData("\1", 1));
8✔
963
    col2.add(StringData("\0\0", 2));
8✔
964
    col2.add(StringData("\0\1", 2));
8✔
965
    col2.add(StringData("\1\0", 2));
8✔
966

4✔
967
    CHECK_EQUAL(ndx2.find_first(StringData("\0", 1)), col2.key(0));
8✔
968
    CHECK_EQUAL(ndx2.find_first(StringData("\1", 1)), col2.key(1));
8✔
969
    CHECK_EQUAL(ndx2.find_first(StringData("\2", 1)), null_key);
8✔
970
    CHECK_EQUAL(ndx2.find_first(StringData("\0\0", 2)), col2.key(2));
8✔
971
    CHECK_EQUAL(ndx2.find_first(StringData("\0\1", 2)), col2.key(3));
8✔
972
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0", 2)), col2.key(4));
8✔
973
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0\0", 3)), null_key);
8✔
974

4✔
975
    // Integer index (uses String index internally)
4✔
976
    int64_t v = 1ULL << 41;
8✔
977
    column<Int> test_resources_1;
8✔
978
    auto col = test_resources_1.get_column();
8✔
979
    const SearchIndex& ndx = *col.create_search_index();
8✔
980
    col.add(1ULL << 40);
8✔
981
    auto f = ndx.find_first(v);
8✔
982
    CHECK_EQUAL(f, null_key);
8✔
983
}
8✔
984

985
TEST_TYPES(StringIndex_Null, nullable_string_column, nullable_enum_column)
986
{
4✔
987
    TEST_TYPE test_resources;
4✔
988
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
989

2✔
990
    col.add("");
4✔
991
    col.add(realm::null());
4✔
992

2✔
993
    const SearchIndex& ndx = *col.create_search_index();
4✔
994

2✔
995
    auto r1 = ndx.find_first(realm::null());
4✔
996
    CHECK_EQUAL(r1, col.key(1));
4✔
997
}
4✔
998

999

1000
TEST_TYPES(StringIndex_Zero_Crash, string_column, nullable_string_column, enum_column, nullable_enum_column)
1001
{
8✔
1002
    bool nullable = TEST_TYPE::is_nullable();
8✔
1003

4✔
1004
    // StringIndex could crash if strings ended with one or more 0-bytes
4✔
1005
    Table table;
8✔
1006
    auto col = table.add_column(type_String, "strings", nullable);
8✔
1007

4✔
1008
    auto k0 = table.create_object().set(col, StringData("")).get_key();
8✔
1009
    auto k1 = table.create_object().set(col, StringData("\0", 1)).get_key();
8✔
1010
    auto k2 = table.create_object().set(col, StringData("\0\0", 2)).get_key();
8✔
1011
    table.add_search_index(col);
8✔
1012

4✔
1013
    if (TEST_TYPE::is_enumerated())
8✔
1014
        table.enumerate_string_column(col);
4✔
1015

4✔
1016
    ObjKey t;
8✔
1017

4✔
1018
    t = table.find_first_string(col, StringData(""));
8✔
1019
    CHECK_EQUAL(k0, t);
8✔
1020

4✔
1021
    t = table.find_first_string(col, StringData("\0", 1));
8✔
1022
    CHECK_EQUAL(k1, t);
8✔
1023

4✔
1024
    t = table.find_first_string(col, StringData("\0\0", 2));
8✔
1025
    CHECK_EQUAL(k2, t);
8✔
1026
}
8✔
1027

1028
TEST_TYPES(StringIndex_Zero_Crash2, std::true_type, std::false_type)
1029
{
4✔
1030
    Random random(random_int<unsigned long>());
4✔
1031

2✔
1032
    constexpr bool add_common_prefix = TEST_TYPE::value;
4✔
1033

2✔
1034
    for (size_t iter = 0; iter < 10 + TEST_DURATION * 100; iter++) {
44✔
1035
        // StringIndex could crash if strings ended with one or more 0-bytes
20✔
1036
        Table table;
40✔
1037
        auto col = table.add_column(type_String, "string", true);
40✔
1038

20✔
1039
        table.add_search_index(col);
40✔
1040

20✔
1041
        for (size_t i = 0; i < 100 + TEST_DURATION * 1000; i++) {
4,040✔
1042
            unsigned char action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1043
            if (action == 0) {
4,000✔
1044
                table.remove_search_index(col);
33✔
1045
                table.add_search_index(col);
33✔
1046
            }
33✔
1047
            else if (action > 48 && table.size() < 10) {
3,967✔
1048
                // Generate string with equal probability of being empty, null, short, medium and long, and with
987✔
1049
                // their contents having equal proability of being either random or a duplicate of a previous
987✔
1050
                // string. When it's random, each char must have equal probability of being 0 or non-0e
987✔
1051
                static std::string buf =
1,972✔
1052
                    "This string is around 90 bytes long, which falls in the long-string type of Realm strings";
1,972✔
1053

987✔
1054
                std::string copy = buf;
1,972✔
1055

987✔
1056
                static std::string buf2 =
1,972✔
1057
                    "                                                                                         ";
1,972✔
1058
                std::string copy2 = buf2;
1,972✔
1059
                StringData sd;
1,972✔
1060

987✔
1061
                size_t len = random.draw_int_max<size_t>(3);
1,972✔
1062
                if (len == 0)
1,972✔
1063
                    len = 0;
470✔
1064
                else if (len == 1)
1,502✔
1065
                    len = 7;
499✔
1066
                else if (len == 2)
1,003✔
1067
                    len = 27;
495✔
1068
                else
508✔
1069
                    len = random.draw_int_max<size_t>(90);
508✔
1070

987✔
1071
                copy = copy.substr(0, len);
1,972✔
1072
                if (add_common_prefix) {
1,972✔
1073
                    std::string prefix(StringIndex::s_max_offset, 'a');
982✔
1074
                    copy = prefix + copy;
982✔
1075
                }
982✔
1076

987✔
1077
                if (random.draw_int_max<int>(1) == 0) {
1,972✔
1078
                    // duplicate string
471✔
1079
                    sd = StringData(copy);
988✔
1080
                }
988✔
1081
                else {
984✔
1082
                    // random string
516✔
1083
                    for (size_t t = 0; t < len; t++) {
21,132✔
1084
                        if (random.draw_int_max<int>(100) > 20)
20,148✔
1085
                            copy2[t] = 0; // zero byte
15,981✔
1086
                        else
4,167✔
1087
                            copy2[t] = static_cast<char>(random.draw_int<int>()); // random byte
4,167✔
1088
                    }
20,148✔
1089
                    // no generated string can equal "null" (our vector magic value for null) because
516✔
1090
                    // len == 4 is not possible
516✔
1091
                    copy2 = copy2.substr(0, len);
984✔
1092
                    if (add_common_prefix) {
984✔
1093
                        std::string prefix(StringIndex::s_max_offset, 'a');
486✔
1094
                        copy2 = prefix + copy2;
486✔
1095
                    }
486✔
1096
                    sd = StringData(copy2);
984✔
1097
                }
984✔
1098

987✔
1099
                bool done = false;
1,972✔
1100
                do {
1,972✔
1101
                    int64_t key_val = random.draw_int_max<int64_t>(10000);
1,972✔
1102
                    try {
1,972✔
1103
                        table.create_object(ObjKey(key_val)).set(col, sd);
1,972✔
1104
                        done = true;
1,972✔
1105
                    }
1,972✔
1106
                    catch (...) {
987✔
UNCOV
1107
                    }
×
1108
                } while (!done);
1,972✔
1109
                table.verify();
1,972✔
1110
            }
1,972✔
1111
            else if (table.size() > 0) {
1,995✔
1112
                // delete
870✔
1113
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
1,738✔
1114
                Obj obj = table.get_object(row);
1,738✔
1115
                obj.remove();
1,738✔
1116
            }
1,738✔
1117

2,000✔
1118
            action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1119
            if (table.size() > 0) {
4,000✔
1120
                // Search for value that exists
1,735✔
1121
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
3,487✔
1122
                Obj obj = table.get_object(row);
3,487✔
1123
                StringData sd = obj.get<String>(col);
3,487✔
1124
                ObjKey t = table.find_first_string(col, sd);
3,487✔
1125
                StringData sd2 = table.get_object(t).get<String>(col);
3,487✔
1126
                CHECK_EQUAL(sd, sd2);
3,487✔
1127
            }
3,487✔
1128
        }
4,000✔
1129
    }
40✔
1130
}
4✔
1131

1132
TEST(StringIndex_Integer_Increasing)
1133
{
2✔
1134
    const size_t rows = 2000 + 1000000 * TEST_DURATION;
2✔
1135

1✔
1136
    // StringIndex could crash if strings ended with one or more 0-bytes
1✔
1137
    Table table;
2✔
1138
    auto col = table.add_column(type_Int, "int");
2✔
1139
    table.add_search_index(col);
2✔
1140

1✔
1141
    std::multiset<int64_t> reference;
2✔
1142

1✔
1143
    for (size_t row = 0; row < rows; row++) {
4,002✔
1144
        int64_t r = fastrand((TEST_DURATION == 0) ? 2000 : 0x100000);
4,000✔
1145
        table.create_object().set(col, r);
4,000✔
1146
        reference.insert(r);
4,000✔
1147
    }
4,000✔
1148

1✔
1149
    for (auto obj : table) {
4,000✔
1150
        int64_t v = obj.get<Int>(col);
4,000✔
1151
        size_t c = table.count_int(col, v);
4,000✔
1152
        size_t ref_count = reference.count(v);
4,000✔
1153
        CHECK_EQUAL(c, ref_count);
4,000✔
1154
    }
4,000✔
1155
}
2✔
1156

1157
TEST_TYPES(StringIndex_Duplicate_Values, string_column, nullable_string_column, enum_column, nullable_enum_column)
1158
{
8✔
1159
    TEST_TYPE test_resources;
8✔
1160
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1161

4✔
1162
    col.add(s1);
8✔
1163
    col.add(s2);
8✔
1164
    col.add(s3);
8✔
1165
    col.add(s4);
8✔
1166

4✔
1167
    // Create a new index on column
4✔
1168
    const SearchIndex& ndx = *col.create_search_index();
8✔
1169

4✔
1170
    CHECK(!ndx.has_duplicate_values());
8✔
1171

4✔
1172
    col.add(s1); // duplicate value
8✔
1173

4✔
1174
    CHECK(ndx.has_duplicate_values());
8✔
1175

4✔
1176
    // remove and test again.
4✔
1177
    col.erase(4);
8✔
1178
    CHECK(!ndx.has_duplicate_values());
8✔
1179
    col.add(s1);
8✔
1180
    CHECK(ndx.has_duplicate_values());
8✔
1181
    col.erase(0);
8✔
1182
    CHECK(!ndx.has_duplicate_values());
8✔
1183
    col.clear();
8✔
1184

4✔
1185
    // check emptied set
4✔
1186
    CHECK(ndx.is_empty());
8✔
1187
    CHECK(!ndx.has_duplicate_values());
8✔
1188

4✔
1189
    const size_t num_rows = 100;
8✔
1190

4✔
1191
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1192
        std::string to_insert(util::to_string(i));
800✔
1193
        col.add(to_insert);
800✔
1194
    }
800✔
1195
    CHECK(!ndx.has_duplicate_values());
8✔
1196

4✔
1197
    std::string a_string = "a";
8✔
1198
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1199
        col.add(a_string);
800✔
1200
        a_string += "a";
800✔
1201
    }
800✔
1202
    std::string str_num_rows(util::to_string(num_rows));
8✔
1203
    CHECK(!ndx.has_duplicate_values());
8✔
1204
    col.add(a_string);
8✔
1205
    col.add(a_string);
8✔
1206
    CHECK(ndx.has_duplicate_values());
8✔
1207
    col.erase(col.size() - 1);
8✔
1208
    CHECK(!ndx.has_duplicate_values());
8✔
1209

4✔
1210
    // Insert into the middle unique value of num_rows
4✔
1211
    col.insert(num_rows / 2, str_num_rows);
8✔
1212

4✔
1213
    CHECK(!ndx.has_duplicate_values());
8✔
1214

4✔
1215
    // Set the next element to be num_rows too
4✔
1216
    col.set(num_rows / 2 + 1, str_num_rows);
8✔
1217

4✔
1218
    CHECK(ndx.has_duplicate_values());
8✔
1219

4✔
1220
    col.clear();
8✔
1221
    CHECK(!ndx.has_duplicate_values());
8✔
1222
    CHECK(col.size() == 0);
8✔
1223
}
8✔
1224

1225
TEST_TYPES(StringIndex_MaxBytes, string_column, nullable_string_column, enum_column, nullable_enum_column)
1226
{
8✔
1227
    TEST_TYPE test_resources;
8✔
1228
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1229

4✔
1230
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
1231
    std::string std_over_max(std_max + "a");
8✔
1232
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
1233
    StringData max(std_max);
8✔
1234
    StringData over_max(std_over_max);
8✔
1235
    StringData under_max(std_under_max);
8✔
1236

4✔
1237
    const SearchIndex& ndx = *col.create_search_index();
8✔
1238

4✔
1239
    CHECK_EQUAL(col.size(), 0);
8✔
1240

4✔
1241
    auto duplicate_check = [&](size_t num_dups, StringData s) {
120✔
1242
        CHECK(col.size() == 0);
120✔
1243
        for (size_t i = 0; i < num_dups; ++i) {
72,384✔
1244
            col.add(s);
72,264✔
1245
        }
72,264✔
1246
        CHECK_EQUAL(col.size(), num_dups);
120✔
1247
        CHECK(ndx.has_duplicate_values() == (num_dups > 1));
120✔
1248
        CHECK_EQUAL(col.get(0), s);
120✔
1249
        CHECK_EQUAL(col.count(s), num_dups);
120✔
1250
        CHECK_EQUAL(col.find_first(s), 0);
120✔
1251
        col.clear();
120✔
1252
    };
120✔
1253

4✔
1254
    std::vector<size_t> num_duplicates_list = {
8✔
1255
        1, 10, REALM_MAX_BPNODE_SIZE - 1, REALM_MAX_BPNODE_SIZE, REALM_MAX_BPNODE_SIZE + 1,
8✔
1256
    };
8✔
1257
    for (auto& dups : num_duplicates_list) {
40✔
1258
        duplicate_check(dups, under_max);
40✔
1259
        duplicate_check(dups, max);
40✔
1260
        duplicate_check(dups, over_max);
40✔
1261
    }
40✔
1262
}
8✔
1263

1264

1265
// There is a corner case where two very long strings are
1266
// inserted into the string index which are identical except
1267
// for the characters at the end (they have an identical very
1268
// long prefix). This was causing a stack overflow because of
1269
// the recursive nature of the insert function.
1270
TEST_TYPES(StringIndex_InsertLongPrefix, string_column, nullable_string_column, enum_column, nullable_enum_column)
1271
{
8✔
1272
    TEST_TYPE test_resources;
8✔
1273
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1274
    const SearchIndex& ndx = *col.create_search_index();
8✔
1275

4✔
1276
    col.add("test_index_string1");
8✔
1277
    col.add("test_index_string2");
8✔
1278

4✔
1279
    CHECK_EQUAL(col.find_first("test_index_string1"), 0);
8✔
1280
    CHECK_EQUAL(col.find_first("test_index_string2"), 1);
8✔
1281

4✔
1282
    std::string std_base(107, 'a');
8✔
1283
    std::string std_base_b = std_base + "b";
8✔
1284
    std::string std_base_c = std_base + "c";
8✔
1285
    StringData base_b(std_base_b);
8✔
1286
    StringData base_c(std_base_c);
8✔
1287
    col.add(base_b);
8✔
1288
    ndx.verify();
8✔
1289
    col.add(base_c);
8✔
1290
    ndx.verify();
8✔
1291

4✔
1292
    CHECK_EQUAL(col.find_first(base_b), 2);
8✔
1293
    CHECK_EQUAL(col.find_first(base_c), 3);
8✔
1294

4✔
1295
    // To trigger the bug, the length must be more than 10000.
4✔
1296
    // Array::destroy_deep() will stack overflow at around recursion depths of
4✔
1297
    // lengths > 90000 on mac and less on android devices.
4✔
1298
    std::string std_base2(100000, 'a');
8✔
1299
    std::string std_base2_b = std_base2 + "b";
8✔
1300
    std::string std_base2_c = std_base2 + "c";
8✔
1301
    StringData base2(std_base2);
8✔
1302
    StringData base2_b(std_base2_b);
8✔
1303
    StringData base2_c(std_base2_c);
8✔
1304
    col.add(base2_b);
8✔
1305
    ndx.verify();
8✔
1306
    col.add(base2_c);
8✔
1307
    ndx.verify();
8✔
1308

4✔
1309
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1310
    CHECK_EQUAL(col.find_first(base2_c), 5);
8✔
1311

4✔
1312
    col.add(base2);
8✔
1313
    CHECK(!ndx.has_duplicate_values());
8✔
1314
    ndx.verify();
8✔
1315
    col.add(base2_b); // adds a duplicate in the middle of the list
8✔
1316

4✔
1317
    CHECK(ndx.has_duplicate_values());
8✔
1318
    std::vector<ObjKey> find_all_result;
8✔
1319
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1320
    ndx.find_all(find_all_result, base2_b);
8✔
1321
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1322
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1323
    CHECK_EQUAL(find_all_result[1], col.key(7));
8✔
1324
    find_all_result.clear();
8✔
1325
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1326
    col.verify();
8✔
1327

4✔
1328
    col.erase(7);
8✔
1329
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1330
    CHECK_EQUAL(ndx.count(base2_b), 1);
8✔
1331
    ndx.find_all(find_all_result, base2_b);
8✔
1332
    CHECK_EQUAL(find_all_result.size(), 1);
8✔
1333
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1334
    find_all_result.clear();
8✔
1335
    col.verify();
8✔
1336

4✔
1337
    col.set(6, base2_b);
8✔
1338
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1339
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1340
    ndx.find_all(find_all_result, base2_b);
8✔
1341
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1342
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1343
    CHECK_EQUAL(find_all_result[1], col.key(6));
8✔
1344
    col.verify();
8✔
1345

4✔
1346
    col.clear(); // calls recursive function Array::destroy_deep()
8✔
1347
}
8✔
1348

1349
TEST_TYPES(StringIndex_InsertLongPrefixAndQuery, string_column, nullable_string_column, enum_column,
1350
           nullable_enum_column)
1351
{
8✔
1352
    constexpr int half_node_size = REALM_MAX_BPNODE_SIZE / 2;
8✔
1353
    bool nullable_column = TEST_TYPE::is_nullable();
8✔
1354
    Group g;
8✔
1355
    auto t = g.add_table("StringsOnly");
8✔
1356
    auto col = t->add_column(type_String, "first", nullable_column);
8✔
1357
    t->add_search_index(col);
8✔
1358

4✔
1359
    std::string base(StringIndex::s_max_offset, 'a');
8✔
1360
    std::string str_a = base + "aaaaa";
8✔
1361
    std::string str_a0 = base + "aaaa0";
8✔
1362
    std::string str_ax = base + "aaaax";
8✔
1363
    std::string str_b = base + "bbbbb";
8✔
1364
    std::string str_c = base + "ccccc";
8✔
1365
    std::string str_c0 = base + "cccc0";
8✔
1366
    std::string str_cx = base + "ccccx";
8✔
1367

4✔
1368
    for (int i = 0; i < half_node_size * 3; i++) {
12,008✔
1369
        t->create_object().set(col, str_a);
12,000✔
1370
        t->create_object().set(col, str_b);
12,000✔
1371
        t->create_object().set(col, str_c);
12,000✔
1372
    }
12,000✔
1373
    t->create_object().set(col, str_ax);
8✔
1374
    t->create_object().set(col, str_ax);
8✔
1375
    t->create_object().set(col, str_a0);
8✔
1376
    /*
4✔
1377
    {
4✔
1378
        std::ofstream o("index.dot");
4✔
1379
        index->to_dot(o, "");
4✔
1380
    }
4✔
1381
    */
4✔
1382
    if (TEST_TYPE::is_enumerated())
8✔
1383
        t->enumerate_string_column(col);
4✔
1384

4✔
1385
    auto ndx_a = t->where().equal(col, StringData(str_a)).find();
8✔
1386
    auto cnt = t->count_string(col, StringData(str_a));
8✔
1387
    auto tw_a = t->where().equal(col, StringData(str_a)).find_all();
8✔
1388
    CHECK_EQUAL(ndx_a, ObjKey(0));
8✔
1389
    CHECK_EQUAL(cnt, half_node_size * 3);
8✔
1390
    CHECK_EQUAL(tw_a.size(), half_node_size * 3);
8✔
1391
    ndx_a = t->where().equal(col, StringData(str_c0)).find();
8✔
1392
    CHECK_EQUAL(ndx_a, null_key);
8✔
1393
    ndx_a = t->where().equal(col, StringData(str_cx)).find();
8✔
1394
    CHECK_EQUAL(ndx_a, null_key);
8✔
1395
    // Find string that is 'less' than strings in the table, but with identical last key
4✔
1396
    tw_a = t->where().equal(col, StringData(str_c0)).find_all();
8✔
1397
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1398
    // Find string that is 'greater' than strings in the table, but with identical last key
4✔
1399
    tw_a = t->where().equal(col, StringData(str_cx)).find_all();
8✔
1400
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1401

4✔
1402
    // Same as above, but just for 'count' method
4✔
1403
    cnt = t->count_string(col, StringData(str_c0));
8✔
1404
    CHECK_EQUAL(cnt, 0);
8✔
1405
    cnt = t->count_string(col, StringData(str_cx));
8✔
1406
    CHECK_EQUAL(cnt, 0);
8✔
1407
}
8✔
1408

1409

1410
TEST(StringIndex_Fuzzy)
1411
{
2✔
1412
    constexpr size_t chunkcount = 50;
2✔
1413
    constexpr size_t rowcount = 100 + 1000 * TEST_DURATION;
2✔
1414

1✔
1415
    for (size_t main_rounds = 0; main_rounds < 2 + 10 * TEST_DURATION; main_rounds++) {
6✔
1416

2✔
1417
        Group g;
4✔
1418

2✔
1419
        auto t = g.add_table("StringsOnly");
4✔
1420
        auto col0 = t->add_column(type_String, "first");
4✔
1421
        auto col1 = t->add_column(type_String, "second");
4✔
1422

2✔
1423
        t->add_search_index(col0);
4✔
1424

2✔
1425
        std::string strings[chunkcount];
4✔
1426

2✔
1427
        for (size_t j = 0; j < chunkcount; j++) {
204✔
1428
            size_t len = fastrand() % REALM_MAX_BPNODE_SIZE;
200✔
1429

100✔
1430
            for (size_t i = 0; i < len; i++)
105,408✔
1431
                strings[j] += char(fastrand());
105,208✔
1432
        }
200✔
1433

2✔
1434
        for (size_t rows = 0; rows < rowcount; rows++) {
404✔
1435
            // Strings consisting of 2 concatenated strings are very interesting
200✔
1436
            size_t chunks;
400✔
1437
            if (fastrand() % 2 == 0)
400✔
1438
                chunks = fastrand() % 4;
200✔
1439
            else
200✔
1440
                chunks = 2;
200✔
1441

200✔
1442
            std::string str;
400✔
1443

200✔
1444
            for (size_t c = 0; c < chunks; c++) {
1,092✔
1445
                str += strings[fastrand() % chunkcount];
692✔
1446
            }
692✔
1447

200✔
1448
            t->create_object().set_all(str, str);
400✔
1449
        }
400✔
1450

2✔
1451
        for (size_t rounds = 0; rounds < 1 + 10 * TEST_DURATION; rounds++) {
8✔
1452
            for (auto obj : *t) {
400✔
1453

200✔
1454
                TableView tv0 = (t->column<String>(col0) == obj.get<String>(col0)).find_all();
400✔
1455
                TableView tv1 = (t->column<String>(col1) == obj.get<String>(col1)).find_all();
400✔
1456

200✔
1457
                CHECK_EQUAL(tv0.size(), tv1.size());
400✔
1458

200✔
1459
                for (size_t v = 0; v < tv0.size(); v++) {
1,400✔
1460
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
1,000✔
1461
                }
1,000✔
1462
            }
400✔
1463

2✔
1464

2✔
1465
            for (size_t r = 0; r < 5 + 1000 * TEST_DURATION; r++) {
24✔
1466
                size_t chunks;
20✔
1467
                if (fastrand() % 2 == 0)
20✔
1468
                    chunks = fastrand() % 4;
11✔
1469
                else
9✔
1470
                    chunks = 2;
9✔
1471

10✔
1472
                std::string str;
20✔
1473

10✔
1474
                for (size_t c = 0; c < chunks; c++) {
52✔
1475
                    str += strings[fastrand() % chunkcount];
32✔
1476
                }
32✔
1477

10✔
1478
                TableView tv0 = (t->column<String>(col0) == str).find_all();
20✔
1479
                TableView tv1 = (t->column<String>(col1) == str).find_all();
20✔
1480

10✔
1481
                CHECK_EQUAL(tv0.size(), tv1.size());
20✔
1482

10✔
1483
                for (size_t v = 0; v < tv0.size(); v++) {
70✔
1484
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
50✔
1485
                }
50✔
1486
            }
20✔
1487
            if (t->size() > 10)
4✔
1488
                t->get_object(0).remove();
4✔
1489

2✔
1490
            size_t r1 = fastrand() % t->size();
4✔
1491
            size_t r2 = fastrand() % t->size();
4✔
1492

2✔
1493
            std::string str = t->get_object(r2).get<String>(col0);
4✔
1494
            Obj obj = t->get_object(r1);
4✔
1495
            obj.set<String>(col0, StringData(str));
4✔
1496
            obj.set<String>(col1, StringData(str));
4✔
1497
        }
4✔
1498
    }
4✔
1499
}
2✔
1500

1501
namespace {
1502

1503
// results returned by the index should be in ascending row order
1504
// this requirement is assumed by the query system which runs find_gte
1505
// and this will return wrong results unless the results are ordered
1506
void check_result_order(const std::vector<ObjKey>& results, TestContext& test_context)
1507
{
88✔
1508
    const size_t num_results = results.size();
88✔
1509
    for (size_t i = 1; i < num_results; ++i) {
288✔
1510
        CHECK(results[i - 1] < results[i]);
200✔
1511
    }
200✔
1512
}
88✔
1513

1514
} // end anonymous namespace
1515

1516

1517
TEST_TYPES(StringIndex_Insensitive, string_column, nullable_string_column, enum_column, nullable_enum_column)
1518
{
8✔
1519
    TEST_TYPE test_resources;
8✔
1520
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1521

4✔
1522
    const char* strings[] = {
8✔
1523
        "john", "John", "jOhn", "JOhn", "joHn", "JoHn", "jOHn", "JOHn", "johN", "JohN", "jOhN", "JOhN", "joHN", "JoHN", "jOHN", "JOHN", "john" /* yes, an extra to test the "bucket" case as well */,
8✔
1524
        "hans", "Hansapark", "george", "billion dollar startup",
8✔
1525
        "abcde", "abcdE", "Abcde", "AbcdE",
8✔
1526
        "common", "common"
8✔
1527
    };
8✔
1528

4✔
1529
    for (const char* string : strings) {
216✔
1530
        col.add(string);
216✔
1531
    }
216✔
1532

4✔
1533
    // Generate 255 strings with 1..255 'a' chars
4✔
1534
    for (int i = 1; i < 256; ++i) {
2,048✔
1535
        col.add(std::string(i, 'a').c_str());
2,040✔
1536
    }
2,040✔
1537

4✔
1538
    // Create a new index on column
4✔
1539
    const SearchIndex& ndx = *col.create_search_index();
8✔
1540

4✔
1541
    std::vector<ObjKey> results;
8✔
1542
    {
8✔
1543
        // case sensitive
4✔
1544
        ndx.find_all(results, strings[0]);
8✔
1545
        CHECK_EQUAL(2, results.size());
8✔
1546
        CHECK_EQUAL(col.get(results[0]), strings[0]);
8✔
1547
        CHECK_EQUAL(col.get(results[1]), strings[0]);
8✔
1548
        check_result_order(results, test_context);
8✔
1549
        results.clear();
8✔
1550
    }
8✔
1551

4✔
1552
    {
8✔
1553
        constexpr bool case_insensitive = true;
8✔
1554
        const char* needle = "john";
8✔
1555
        auto upper_needle = case_map(needle, true);
8✔
1556
        ndx.find_all(results, needle, case_insensitive);
8✔
1557
        CHECK_EQUAL(17, results.size());
8✔
1558
        for (size_t i = 0; i < results.size(); ++i) {
144✔
1559
            auto upper_result = case_map(col.get(results[i]), true);
136✔
1560
            CHECK_EQUAL(upper_result, upper_needle);
136✔
1561

68✔
1562
        }
136✔
1563
        check_result_order(results, test_context);
8✔
1564
        results.clear();
8✔
1565
    }
8✔
1566

4✔
1567

4✔
1568
    {
8✔
1569
        struct TestData {
8✔
1570
            const bool case_insensitive;
8✔
1571
            const char* const needle;
8✔
1572
            const size_t result_size;
8✔
1573
        };
8✔
1574

4✔
1575
        TestData td[] = {
8✔
1576
            {true, "Hans", 1},
8✔
1577
            {true, "Geor", 0},
8✔
1578
            {true, "George", 1},
8✔
1579
            {true, "geoRge", 1},
8✔
1580
            {true, "Billion Dollar Startup", 1},
8✔
1581
            {true, "ABCDE", 4},
8✔
1582
            {true, "commON", 2},
8✔
1583
        };
8✔
1584

4✔
1585
        for (const TestData& t : td) {
56✔
1586
            ndx.find_all(results, t.needle, t.case_insensitive);
56✔
1587
            CHECK_EQUAL(t.result_size, results.size());
56✔
1588
            check_result_order(results, test_context);
56✔
1589
            results.clear();
56✔
1590
        }
56✔
1591
    }
8✔
1592

4✔
1593
    // Test generated 'a'-strings
4✔
1594
    for (int i = 1; i < 256; ++i) {
2,048✔
1595
        const std::string str = std::string(i, 'A');
2,040✔
1596
        ndx.find_all(results, str.c_str(), false);
2,040✔
1597
        CHECK_EQUAL(0, results.size());
2,040✔
1598
        ndx.find_all(results, str.c_str(), true);
2,040✔
1599
        CHECK_EQUAL(1, results.size());
2,040✔
1600
        results.clear();
2,040✔
1601
    }
2,040✔
1602
}
8✔
1603

1604

1605
/* Disabled until we have better support for case mapping unicode characters
1606

1607
TEST_TYPES(StringIndex_Insensitive_Unicode, non_nullable, nullable)
1608
{
1609
    constexpr bool nullable = TEST_TYPE::value;
1610

1611
    // Create a column with string values
1612
    ref_type ref = StringColumn::create(Allocator::get_default());
1613
    StringColumn col(Allocator::get_default(), ref, nullable);
1614

1615
    const char* strings[] = {
1616
        "æøå", "ÆØÅ",
1617
    };
1618

1619
    for (const char* string : strings) {
1620
        col.add(string);
1621
    }
1622

1623
    // Create a new index on column
1624
    const SearchIndex& ndx = *col.create_search_index();
1625

1626
    ref_type results_ref = IntegerColumn::create(Allocator::get_default());
1627
    IntegerColumn results(Allocator::get_default(), results_ref);
1628

1629
    {
1630
        struct TestData {
1631
            const bool case_insensitive;
1632
            const char* const needle;
1633
            const size_t result_size;
1634
        };
1635

1636
        TestData td[] = {
1637
            {false, "æøå", 1},
1638
            {false, "ÆØÅ", 1},
1639
            {true, "æøå", 2},
1640
            {true, "Æøå", 2},
1641
            {true, "æØå", 2},
1642
            {true, "ÆØå", 2},
1643
            {true, "æøÅ", 2},
1644
            {true, "ÆøÅ", 2},
1645
            {true, "æØÅ", 2},
1646
            {true, "ÆØÅ", 2},
1647
        };
1648

1649
        for (const TestData& t : td) {
1650
            ndx.find_all(results, t.needle, t.case_insensitive);
1651
            CHECK_EQUAL(t.result_size, results.size());
1652
            results.clear();
1653
        }
1654
    }
1655

1656
    // Clean up
1657
    results.destroy();
1658
    col.destroy();
1659
}
1660

1661
*/
1662

1663

1664
TEST_TYPES(StringIndex_45, string_column, nullable_string_column, enum_column, nullable_enum_column)
1665
{
8✔
1666
    TEST_TYPE test_resources;
8✔
1667
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1668
    const SearchIndex& ndx = *col.create_search_index();
8✔
1669
    std::string a4 = std::string(4, 'a');
8✔
1670
    std::string A5 = std::string(5, 'A');
8✔
1671

4✔
1672
    col.add(a4);
8✔
1673
    col.add(a4);
8✔
1674

4✔
1675
    std::vector<ObjKey> res;
8✔
1676

4✔
1677
    ndx.find_all(res, A5.c_str(), true);
8✔
1678
    CHECK_EQUAL(res.size(), 0);
8✔
1679
}
8✔
1680

1681

1682
namespace {
1683

1684
std::string create_random_a_string(size_t max_len) {
×
1685
    std::string s;
×
1686
    size_t len = size_t(fastrand(max_len));
×
1687
    for (size_t p = 0; p < len; p++) {
×
1688
        s += fastrand(1) == 0 ? 'a' : 'A';
×
1689
    }
×
1690
    return s;
×
1691
}
×
1692

1693
}
1694

1695

1696
// Excluded when run with valgrind because it takes a long time
1697
TEST_TYPES_IF(StringIndex_Insensitive_Fuzz, TEST_DURATION > 1, string_column, nullable_string_column, enum_column,
1698
              nullable_enum_column)
1699
{
×
1700
    const size_t max_str_len = 9;
×
1701
    const size_t iters = 3;
×
1702

1703
    for (size_t iter = 0; iter < iters; iter++) {
×
1704
        TEST_TYPE test_resources;
×
1705
        typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
1706

1707
        size_t rows = size_t(fastrand(2 * REALM_MAX_BPNODE_SIZE - 1));
×
1708

1709
        // Add 'rows' number of rows in the column
1710
        for (size_t t = 0; t < rows; t++) {
×
1711
            std::string str = create_random_a_string(max_str_len);
×
1712
            col.add(str);
×
1713
        }
×
1714

1715
        const SearchIndex& ndx = *col.create_search_index();
×
1716

1717
        for (size_t t = 0; t < 1000; t++) {
×
1718
            std::string needle = create_random_a_string(max_str_len);
×
1719

1720
            std::vector<ObjKey> res;
×
1721

1722
            ndx.find_all(res, needle.c_str(), true);
×
1723
            check_result_order(res, test_context);
×
1724

1725
            // Check that all items in 'res' point at a match in 'col'
1726
            auto needle_upper = case_map(needle, true);
×
1727
            for (size_t res_ndx = 0; res_ndx < res.size(); res_ndx++) {
×
1728
                auto res_upper = case_map(col.get(res[res_ndx]), true);
×
1729
                CHECK_EQUAL(res_upper, needle_upper);
×
1730
            }
×
1731

1732
            // Check that all matches in 'col' exist in 'res'
1733
            for (size_t col_ndx = 0; col_ndx < col.size(); col_ndx++) {
×
1734
                auto str_upper = case_map(col.get(col_ndx), true);
×
1735
                if (str_upper == needle_upper) {
×
1736
                    CHECK(std::find(res.begin(), res.end(), col.key(col_ndx)) != res.end());
×
1737
                }
×
1738
            }
×
1739
        }
×
1740
    }
×
1741
}
×
1742

1743
// Exercise the StringIndex case insensitive search for strings with very long, common prefixes
1744
// to cover the special case code paths where different strings are stored in a list.
1745
TEST_TYPES(StringIndex_Insensitive_VeryLongStrings, string_column, nullable_string_column, enum_column,
1746
           nullable_enum_column)
1747
{
8✔
1748
    TEST_TYPE test_resources;
8✔
1749
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1750
    const SearchIndex& ndx = *col.create_search_index();
8✔
1751

4✔
1752
    std::string long1 = std::string(StringIndex::s_max_offset + 10, 'a');
8✔
1753
    std::string long2 = long1 + "b";
8✔
1754
    std::string long3 = long1 + "c";
8✔
1755

4✔
1756
    // Add the strings in a "random" order
4✔
1757
    col.add(long1);
8✔
1758
    col.add(long2);
8✔
1759
    col.add(long2);
8✔
1760
    col.add(long1);
8✔
1761
    col.add(long3);
8✔
1762
    col.add(long2);
8✔
1763
    col.add(long1);
8✔
1764
    col.add(long1);
8✔
1765

4✔
1766
    std::vector<ObjKey> results;
8✔
1767

4✔
1768
    ndx.find_all(results, long1.c_str(), true);
8✔
1769
    CHECK_EQUAL(results.size(), 4);
8✔
1770
    check_result_order(results, test_context);
8✔
1771
    results.clear();
8✔
1772
    ndx.find_all(results, long2.c_str(), true);
8✔
1773
    CHECK_EQUAL(results.size(), 3);
8✔
1774
    results.clear();
8✔
1775
    ndx.find_all(results, long3.c_str(), true);
8✔
1776
    CHECK_EQUAL(results.size(), 1);
8✔
1777
    results.clear();
8✔
1778
}
8✔
1779

1780

1781
// Bug with case insensitive search on numbers that gives duplicate results
1782
TEST_TYPES(StringIndex_Insensitive_Numbers, string_column, nullable_string_column, enum_column, nullable_enum_column)
1783
{
8✔
1784
    TEST_TYPE test_resources;
8✔
1785
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1786
    const SearchIndex& ndx = *col.create_search_index();
8✔
1787

4✔
1788
    constexpr const char* number_string_16 = "1111111111111111";
8✔
1789
    constexpr const char* number_string_17 = "11111111111111111";
8✔
1790

4✔
1791
    col.add(number_string_16);
8✔
1792
    col.add(number_string_17);
8✔
1793

4✔
1794
    std::vector<ObjKey> results;
8✔
1795

4✔
1796
    ndx.find_all(results, number_string_16, true);
8✔
1797
    CHECK_EQUAL(results.size(), 1);
8✔
1798
}
8✔
1799

1800

1801
TEST_TYPES(StringIndex_Rover, string_column, nullable_string_column, enum_column, nullable_enum_column)
1802
{
8✔
1803
    TEST_TYPE test_resources;
8✔
1804
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1805

4✔
1806
    const SearchIndex& ndx = *col.create_search_index();
8✔
1807

4✔
1808
    col.add("ROVER");
8✔
1809
    col.add("Rover");
8✔
1810

4✔
1811
    std::vector<ObjKey> results;
8✔
1812

4✔
1813
    ndx.find_all(results, "rover", true);
8✔
1814
    CHECK_EQUAL(results.size(), 2);
8✔
1815
    check_result_order(results, test_context);
8✔
1816
}
8✔
1817

1818
TEST(StringIndex_QuerySingleObject)
1819
{
2✔
1820
    Group g;
2✔
1821
    auto table = g.add_table_with_primary_key("class_StringClass", type_String, "name", true);
2✔
1822
    table->create_object_with_primary_key("Foo");
2✔
1823

1✔
1824
    auto q = table->where().equal(table->get_column_key("name"), "Foo", true);
2✔
1825
    CHECK_EQUAL(q.count(), 1);
2✔
1826
    q = table->where().equal(table->get_column_key("name"), "Bar", true);
2✔
1827
    CHECK_EQUAL(q.count(), 0);
2✔
1828
}
2✔
1829

1830
TEST(StringIndex_MixedNonEmptyTable)
1831
{
2✔
1832
    Group g;
2✔
1833
    auto table = g.add_table("foo");
2✔
1834
    auto col = table->add_column(type_Mixed, "any");
2✔
1835
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1836
    table->add_search_index(col);
2✔
1837
}
2✔
1838

1839
TEST(StringIndex_MixedWithNestedCollections)
1840
{
2✔
1841
    Group g;
2✔
1842
    auto table = g.add_table("foo");
2✔
1843
    auto col = table->add_column(type_Mixed, "value");
2✔
1844
    table->add_search_index(col);
2✔
1845
    table->create_object().set(col, Mixed("apple"));
2✔
1846
    auto obj = table->create_object();
2✔
1847
    obj.set(col, Mixed("banana"));
2✔
1848

1✔
1849
    auto q = table->query("value = 'banana'");
2✔
1850

1✔
1851
    CHECK_EQUAL(q.count(), 1);
2✔
1852
    obj.set_collection(col, CollectionType::Dictionary);
2✔
1853
    CHECK_EQUAL(q.count(), 0);
2✔
1854
    obj.set(col, Mixed("banana"));
2✔
1855
    CHECK_EQUAL(q.count(), 1);
2✔
1856
}
2✔
1857

1858
TEST(StringIndex_MixedEqualBitPattern)
1859
{
2✔
1860
    Group g;
2✔
1861
    auto table = g.add_table("foo");
2✔
1862
    auto col = table->add_column(type_Mixed, "any");
2✔
1863
    table->add_search_index(col);
2✔
1864

1✔
1865
    Mixed val1(int64_t(0x6867666564636261));
2✔
1866
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1867
    // From single value to list
1✔
1868
    table->create_object().set(col, val1);
2✔
1869

1✔
1870
    auto tv = table->where().equal(col, val1).find_all();
2✔
1871
    CHECK_EQUAL(tv.size(), 1);
2✔
1872
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1873

1✔
1874
    table->clear();
2✔
1875
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1876
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1877
    // Insert in existing list
1✔
1878
    table->create_object().set(col, val1);
2✔
1879

1✔
1880
    tv = table->where().equal(col, val1).find_all();
2✔
1881
    CHECK_EQUAL(tv.size(), 1);
2✔
1882
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1883
    tv = table->where().equal(col, Mixed("abcdefgh")).find_all();
2✔
1884
    CHECK_EQUAL(tv.size(), 2);
2✔
1885

1✔
1886
    // Add another one into existing list
1✔
1887
    table->create_object().set(col, val1);
2✔
1888
    tv = table->where().equal(col, val1).find_all();
2✔
1889
    CHECK_EQUAL(tv.size(), 2);
2✔
1890
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1891
    CHECK_EQUAL(tv.get_object(1).get_any(col), val1);
2✔
1892
}
2✔
1893

1894
TEST(Unicode_Casemap)
1895
{
2✔
1896
    std::string inp = "±ÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß×÷";
2✔
1897
    auto out = case_map(inp, false);
2✔
1898
    if (CHECK(out)) {
2✔
1899
        CHECK_EQUAL(*out, "±àáâãäåæèéêëìíîïñòóôõöøùúûüýß×÷");
2✔
1900
    }
2✔
1901
    out = case_map(*out, true);
2✔
1902
    if (CHECK(out)) {
2✔
1903
        CHECK_EQUAL(*out, inp);
2✔
1904
    }
2✔
1905

1✔
1906
    inp = "A very old house 🏠 is on 🔥, we have to save the 🦄";
2✔
1907
    out = case_map(inp, true);
2✔
1908
    if (CHECK(out)) {
2✔
1909
        CHECK_EQUAL(*out, "A VERY OLD HOUSE 🏠 IS ON 🔥, WE HAVE TO SAVE THE 🦄");
2✔
1910
    }
2✔
1911

1✔
1912
    StringData trailing_garbage(inp.data(), 19); // String terminated inside icon
2✔
1913
    out = case_map(trailing_garbage, true);
2✔
1914
    CHECK_NOT(out);
2✔
1915

1✔
1916
    inp = "rødgrød med fløde";
2✔
1917
    out = case_map(inp, true);
2✔
1918
    if (CHECK(out)) {
2✔
1919
        CHECK_EQUAL(*out, "RØDGRØD MED FLØDE");
2✔
1920
    }
2✔
1921
    out = case_map(out, false);
2✔
1922
    if (CHECK(out)) {
2✔
1923
        CHECK_EQUAL(*out, inp);
2✔
1924
    }
2✔
1925
}
2✔
1926

1927
static std::string random_string(std::string::size_type length)
1928
{
60,000✔
1929
    static auto& chrs = "0123456789"
60,000✔
1930
                        "abcdefghijklmnopqrstuvwxyz"
60,000✔
1931
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
60,000✔
1932

30,000✔
1933
    thread_local static std::mt19937 rg{std::random_device{}()};
60,000✔
1934
    thread_local static std::uniform_int_distribution<std::string::size_type> pick(0, sizeof(chrs) - 2);
60,000✔
1935

30,000✔
1936
    std::string s;
60,000✔
1937

30,000✔
1938
    s.reserve(length);
60,000✔
1939

30,000✔
1940
    while (length--)
900,000✔
1941
        s += chrs[pick(rg)];
840,000✔
1942

30,000✔
1943
    return s;
60,000✔
1944
}
60,000✔
1945

1946
TEST(StringIndex_ListOfRandomStrings)
1947
{
2✔
1948
    using namespace std::chrono;
2✔
1949

1✔
1950
    SHARED_GROUP_TEST_PATH(path);
2✔
1951
    auto db = DB::create(path);
2✔
1952
    auto wt = db->start_write();
2✔
1953

1✔
1954
    auto t = wt->add_table_with_primary_key("foo", type_Int, "_id");
2✔
1955
    ColKey col_codes = t->add_column_list(type_String, "codes");
2✔
1956
    std::string some_string;
2✔
1957

1✔
1958
    for (size_t i = 0; i < 10000; i++) {
20,002✔
1959
        auto obj = t->create_object_with_primary_key(int64_t(i));
20,000✔
1960
        auto list = obj.get_list<String>(col_codes);
20,000✔
1961
        for (size_t j = 0; j < 3; j++) {
80,000✔
1962
            std::string str(random_string(14));
60,000✔
1963
            if (i == 5000 && j == 0) {
60,000✔
1964
                some_string = str;
2✔
1965
            }
2✔
1966
            list.add(StringData(str));
60,000✔
1967
        }
60,000✔
1968
    }
20,000✔
1969

1✔
1970
    std::vector<Mixed> arguments{Mixed(some_string)};
2✔
1971
    auto q = wt->get_table("foo")->query("codes = $0", arguments);
2✔
1972
    // auto t1 = steady_clock::now();
1✔
1973
    auto tv = q.find_all();
2✔
1974
    // auto t2 = steady_clock::now();
1✔
1975
    // std::cout << "time without index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1✔
1976
    CHECK_EQUAL(tv.size(), 1);
2✔
1977
    t->add_search_index(col_codes);
2✔
1978

1✔
1979
    // t1 = steady_clock::now();
1✔
1980
    tv = q.find_all();
2✔
1981
    // t2 = steady_clock::now();
1✔
1982
    // std::cout << "time with index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1✔
1983
    CHECK_EQUAL(tv.size(), 1);
2✔
1984
    t->add_search_index(col_codes);
2✔
1985

1✔
1986
    // std::cout << tv.get_object(0).get<Int>("_id") << std::endl;
1✔
1987
}
2✔
1988

1989
TEST_TYPES(StringIndex_ListOfStrings, std::true_type, std::false_type)
1990
{
4✔
1991
    constexpr bool add_index = TEST_TYPE::value;
4✔
1992
    Group g;
4✔
1993

2✔
1994
    auto t = g.add_table("foo");
4✔
1995
    ColKey col = t->add_column_list(type_String, "names", true);
4✔
1996
    if constexpr (add_index) {
4✔
1997
        t->add_search_index(col);
2✔
1998
    }
2✔
1999

2✔
2000
    auto obj1 = t->create_object();
4✔
2001
    auto obj2 = t->create_object();
4✔
2002
    auto obj3 = t->create_object();
4✔
2003

2✔
2004
    for (Obj* obj : {&obj2, &obj3}) {
8✔
2005
        auto list = obj->get_list<String>(col);
8✔
2006
        list.add("Johnny");
8✔
2007
        list.add("John");
8✔
2008
    }
8✔
2009

2✔
2010
    auto list = obj1.get_list<String>(col);
4✔
2011
    list.add("Johnny");
4✔
2012
    list.add("John");
4✔
2013
    list.add("Ivan");
4✔
2014
    list.add("Ivan");
4✔
2015
    list.add(StringData());
4✔
2016

2✔
2017
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2018
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 3);
4✔
2019
    CHECK_EQUAL(t->query(R"(names = NULL)").count(), 1);
4✔
2020

2✔
2021
    list.set(0, "Paul");
4✔
2022
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2023
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2024
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2025

2✔
2026
    list.remove(1);
4✔
2027
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2028
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2029
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2030
    CHECK_EQUAL(t->query(R"(names = "Ivan")").count(), 1);
4✔
2031

2✔
2032
    list.clear();
4✔
2033
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2034
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2035
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 0);
4✔
2036

2✔
2037
    list = obj2.get_list<String>(col);
4✔
2038
    list.insert(0, "Adam");
4✔
2039
    list.insert(0, "Adam");
4✔
2040
    obj2.remove();
4✔
2041
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 1);
4✔
2042
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 1);
4✔
2043

2✔
2044
    std::string long1 = std::string(StringIndex::s_max_offset, 'a');
4✔
2045
    std::string long2 = long1 + "b";
4✔
2046

2✔
2047
    list = obj1.get_list<String>(col);
4✔
2048
    list.add(long1);
4✔
2049
    if (add_index) {
4✔
2050
        CHECK_THROW_ANY(list.add(long2));
2✔
2051
    }
2✔
2052
}
4✔
2053

2054
#endif // TEST_INDEX_STRING
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc