• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / 2555

09 Aug 2024 06:49PM UTC coverage: 91.123% (+0.04%) from 91.087%
2555

push

Evergreen

web-flow
Actually check for unuplaoded changes in no_pending_local_changes() (#7967)

We can have local changesets stored which have already been uploaded and
acknoledged by the server, so checking all of the changesets is incorrect. We
need to instead only check changesets for versions after the current position
of the upload cursor.

102818 of 181588 branches covered (56.62%)

37 of 38 new or added lines in 2 files covered. (97.37%)

42 existing lines in 9 files now uncovered.

217381 of 238557 relevant lines covered (91.12%)

5684689.85 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.48
/test/test_index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include "testsettings.hpp"
20
#ifdef TEST_INDEX_STRING
21

22
#include <realm.hpp>
23
#include <realm/index_string.hpp>
24
#include <realm/query_expression.hpp>
25
#include <realm/tokenizer.hpp>
26
#include <realm/util/to_string.hpp>
27
#include <set>
28
#include "test.hpp"
29
#include "util/misc.hpp"
30
#include "util/random.hpp"
31

32
using namespace realm;
33
using namespace util;
34
using namespace realm;
35
using namespace realm::util;
36
using namespace realm::test_util;
37
using unit_test::TestContext;
38

39
// Test independence and thread-safety
40
// -----------------------------------
41
//
42
// All tests must be thread safe and independent of each other. This
43
// is required because it allows for both shuffling of the execution
44
// order and for parallelized testing.
45
//
46
// In particular, avoid using std::rand() since it is not guaranteed
47
// to be thread safe. Instead use the API offered in
48
// `test/util/random.hpp`.
49
//
50
// All files created in tests must use the TEST_PATH macro (or one of
51
// its friends) to obtain a suitable file system path. See
52
// `test/util/test_path.hpp`.
53
//
54
//
55
// Debugging and the ONLY() macro
56
// ------------------------------
57
//
58
// A simple way of disabling all tests except one called `Foo`, is to
59
// replace TEST(Foo) with ONLY(Foo) and then recompile and rerun the
60
// test suite. Note that you can also use filtering by setting the
61
// environment varible `UNITTEST_FILTER`. See `README.md` for more on
62
// this.
63
//
64
// Another way to debug a particular test, is to copy that test into
65
// `experiments/testcase.cpp` and then run `sh build.sh
66
// check-testcase` (or one of its friends) from the command line.
67

68

69
namespace {
70

71
template <typename T>
72
class column {
73
public:
74
    class ColumnTestType {
75
    public:
76
        ColumnTestType(column* owner)
77
            : m_owner(owner)
88✔
78
        {
176✔
79
        }
176✔
80
        const SearchIndex* create_search_index()
81
        {
178✔
82
            m_owner->m_table.add_search_index(m_owner->m_col_key);
178✔
83
            return m_owner->m_table.get_search_index(m_owner->m_col_key);
178✔
84
        }
178✔
85
        ObjKey key(size_t ndx) const
86
        {
176✔
87
            return m_keys[ndx];
176✔
88
        }
176✔
89
        size_t size() const
90
        {
294✔
91
            return m_keys.size();
294✔
92
        }
294✔
93
        void add(T value)
94
        {
79,638✔
95
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
79,638✔
96
            m_keys.push_back(k);
79,638✔
97
        }
79,638✔
98
        void add_null()
99
        {
2✔
100
            auto k = m_owner->m_table.create_object().set_null(m_owner->m_col_key).get_key();
2✔
101
            m_keys.push_back(k);
2✔
102
        }
2✔
103
        void set(size_t ndx, T value)
104
        {
44✔
105
            m_owner->m_table.get_object(m_keys[ndx]).set(m_owner->m_col_key, value);
44✔
106
        }
44✔
107
        void insert(size_t ndx, T value)
108
        {
10✔
109
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
10✔
110
            m_keys.insert(m_keys.begin() + ndx, k);
10✔
111
        }
10✔
112
        T get(size_t ndx)
113
        {
2,150,282✔
114
            return m_owner->m_table.get_object(m_keys[ndx]).template get<T>(m_owner->m_col_key);
2,150,282✔
115
        }
2,150,282✔
116
        T get(ObjKey obj_key)
117
        {
152✔
118
            return m_owner->m_table.get_object(obj_key).template get<T>(m_owner->m_col_key);
152✔
119
        }
152✔
120
        void erase(size_t ndx)
121
        {
186✔
122
            m_owner->m_table.remove_object(m_keys[ndx]);
186✔
123
            m_keys.erase(m_keys.begin() + ndx);
186✔
124
        }
186✔
125
        void clear()
126
        {
162✔
127
            m_owner->m_table.clear();
162✔
128
            m_keys.clear();
162✔
129
        }
162✔
130
        size_t find_first(T value) const
131
        {
2,824✔
132
            auto k = m_owner->m_table.find_first(m_owner->m_col_key, value);
2,824✔
133
            if (k == realm::null_key) {
2,824✔
134
                return realm::npos;
1,244✔
135
            }
1,244✔
136
            auto it = std::find(m_keys.begin(), m_keys.end(), k);
1,580✔
137
            return it - m_keys.begin();
1,580✔
138
        }
2,824✔
139
        size_t count(T value) const
140
        {
160✔
141
            return m_owner->m_table.count_string(m_owner->m_col_key, value);
160✔
142
        }
160✔
143
        void verify()
144
        {
24✔
145
            m_owner->m_table.verify();
24✔
146
        }
24✔
147

148
    private:
149
        column* m_owner;
150
        std::vector<ObjKey> m_keys;
151
    };
152

153
    column(bool nullable = false, bool enumerated = false)
154
        : m_column(this)
88✔
155
    {
176✔
156
        m_col_key = m_table.add_column(ColumnTypeTraits<T>::id, "values", nullable);
176✔
157
        if (enumerated) {
176✔
158
            m_table.enumerate_string_column(m_col_key);
78✔
159
        }
78✔
160
    }
176✔
161
    ColumnTestType& get_column()
162
    {
176✔
163
        return m_column;
176✔
164
    }
176✔
165

166
private:
167
    Table m_table;
168
    ColKey m_col_key;
169
    ColumnTestType m_column;
170
};
171

172
class string_column : public column<String> {
173
public:
174
    string_column()
175
        : column(false, false)
19✔
176
    {
38✔
177
    }
38✔
178
    static bool is_nullable()
179
    {
4✔
180
        return false;
4✔
181
    }
4✔
182
    static bool is_enumerated()
183
    {
4✔
184
        return false;
4✔
185
    }
4✔
186
};
187
class nullable_string_column : public column<String> {
188
public:
189
    nullable_string_column()
190
        : column(true, false)
20✔
191
    {
40✔
192
    }
40✔
193
    static bool is_nullable()
194
    {
4✔
195
        return true;
4✔
196
    }
4✔
197
    static bool is_enumerated()
198
    {
4✔
199
        return false;
4✔
200
    }
4✔
201
};
202
class enum_column : public column<String> {
203
public:
204
    enum_column()
205
        : column(false, true)
19✔
206
    {
38✔
207
    }
38✔
208
    static bool is_nullable()
209
    {
4✔
210
        return false;
4✔
211
    }
4✔
212
    static bool is_enumerated()
213
    {
4✔
214
        return true;
4✔
215
    }
4✔
216
};
217
class nullable_enum_column : public column<String> {
218
public:
219
    nullable_enum_column()
220
        : column(true, true)
20✔
221
    {
40✔
222
    }
40✔
223
    static bool is_nullable()
224
    {
4✔
225
        return true;
4✔
226
    }
4✔
227
    static bool is_enumerated()
228
    {
4✔
229
        return true;
4✔
230
    }
4✔
231
};
232

233
// disable to avoid warnings about not being used - enable when tests
234
// needed them are enabled again
235

236
// strings used by tests
237
const char s1[] = "John";
238
const char s2[] = "Brian";
239
const char s3[] = "Samantha";
240
const char s4[] = "Tom";
241
const char s5[] = "Johnathan";
242
const char s6[] = "Johnny";
243
const char s7[] = "Sam";
244

245
// integers used by integer index tests
246
std::vector<int64_t> ints = {0x1111,     0x11112222, 0x11113333, 0x1111333, 0x111122223333ull, 0x1111222233334ull,
247
                             0x22223333, 0x11112227, 0x11112227, 0x78923};
248

249
using nullable = std::true_type;
250
using non_nullable = std::false_type;
251

252
} // anonymous namespace
253

254
TEST(Tokenizer_Basic)
255
{
2✔
256
    auto tok = realm::Tokenizer::get_instance();
2✔
257

258
    tok->reset("to be or not to be");
2✔
259
    auto tokens = tok->get_all_tokens();
2✔
260
    CHECK_EQUAL(tokens.size(), 4);
2✔
261

262
    tok->reset("To be or not to be");
2✔
263
    realm::TokenInfoMap info = tok->get_token_info();
2✔
264
    CHECK_EQUAL(info.size(), 4);
2✔
265
    realm::TokenInfo& i(info["to"]);
2✔
266
    CHECK_EQUAL(i.positions.size(), 2);
2✔
267
    CHECK_EQUAL(i.positions[0], 0);
2✔
268
    CHECK_EQUAL(i.positions[1], 4);
2✔
269
    CHECK_EQUAL(i.ranges.size(), 2);
2✔
270
    CHECK_EQUAL(i.ranges[0].first, 0);
2✔
271
    CHECK_EQUAL(i.ranges[0].second, 2);
2✔
272
    CHECK_EQUAL(i.ranges[1].first, 13);
2✔
273
    CHECK_EQUAL(i.ranges[1].second, 15);
2✔
274

275
    tok->reset("Jeg gik mig over sø og land");
2✔
276
    info = tok->get_token_info();
2✔
277
    CHECK_EQUAL(info.size(), 7);
2✔
278
    realm::TokenInfo& j(info["sø"]);
2✔
279
    CHECK_EQUAL(j.ranges[0].first, 17);
2✔
280
    CHECK_EQUAL(j.ranges[0].second, 20);
2✔
281

282
    tok->reset("with-hyphen -term -other-term-plus");
2✔
283
    CHECK(tok->get_all_tokens() == std::set<std::string>({"with", "hyphen", "term", "other", "plus"}));
2✔
284
}
2✔
285

286
TEST(StringIndex_NonIndexable)
287
{
2✔
288
    // Create a column with string values
289
    Group group;
2✔
290
    TableRef table = group.add_table("table");
2✔
291
    TableRef target_table = group.add_table("target");
2✔
292
    table->add_column(*target_table, "link");
2✔
293
    table->add_column_list(*target_table, "linkList");
2✔
294
    table->add_column(type_Double, "double");
2✔
295
    table->add_column(type_Float, "float");
2✔
296
    table->add_column(type_Binary, "binary");
2✔
297

298
    for (auto col : table->get_column_keys()) {
10✔
299
        CHECK_LOGIC_ERROR(table->add_search_index(col), ErrorCodes::IllegalOperation);
10✔
300
    }
10✔
301
}
2✔
302

303
TEST_TYPES(StringIndex_BuildIndex, string_column, nullable_string_column, enum_column, nullable_enum_column)
304
{
8✔
305
    TEST_TYPE test_resources;
8✔
306
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
307

308
    col.add(s1);
8✔
309
    col.add(s2);
8✔
310
    col.add(s3);
8✔
311
    col.add(s4);
8✔
312
    col.add(s1); // duplicate value
8✔
313
    col.add(s5); // common prefix
8✔
314
    col.add(s6); // common prefix
8✔
315

316
    // Create a new index on column
317
    const SearchIndex& ndx = *col.create_search_index();
8✔
318

319
    const ObjKey r1 = ndx.find_first(s1);
8✔
320
    const ObjKey r2 = ndx.find_first(s2);
8✔
321
    const ObjKey r3 = ndx.find_first(s3);
8✔
322
    const ObjKey r4 = ndx.find_first(s4);
8✔
323
    const ObjKey r5 = ndx.find_first(s5);
8✔
324
    const ObjKey r6 = ndx.find_first(s6);
8✔
325

326
    CHECK_EQUAL(0, r1.value);
8✔
327
    CHECK_EQUAL(1, r2.value);
8✔
328
    CHECK_EQUAL(2, r3.value);
8✔
329
    CHECK_EQUAL(3, r4.value);
8✔
330
    CHECK_EQUAL(5, r5.value);
8✔
331
    CHECK_EQUAL(6, r6.value);
8✔
332
}
8✔
333

334
TEST_TYPES(StringIndex_DeleteAll, string_column, nullable_string_column, enum_column, nullable_enum_column)
335
{
8✔
336
    TEST_TYPE test_resources;
8✔
337
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
338

339
    col.add(s1);
8✔
340
    col.add(s2);
8✔
341
    col.add(s3);
8✔
342
    col.add(s4);
8✔
343
    col.add(s1); // duplicate value
8✔
344
    col.add(s5); // common prefix
8✔
345
    col.add(s6); // common prefix
8✔
346

347
    // Create a new index on column
348
    const SearchIndex& ndx = *col.create_search_index();
8✔
349

350
    // Delete all entries
351
    // (reverse order to avoid ref updates)
352
    col.erase(6);
8✔
353
    col.erase(5);
8✔
354
    col.erase(4);
8✔
355
    col.erase(3);
8✔
356
    col.erase(2);
8✔
357
    col.erase(1);
8✔
358
    col.erase(0);
8✔
359
    CHECK(ndx.is_empty());
8✔
360

361
    // Re-insert values
362
    col.add(s1);
8✔
363
    col.add(s2);
8✔
364
    col.add(s3);
8✔
365
    col.add(s4);
8✔
366
    col.add(s1); // duplicate value
8✔
367
    col.add(s5); // common prefix
8✔
368
    col.add(s6); // common prefix
8✔
369

370
    // Delete all entries
371
    // (in order to force constant ref updating)
372
    col.erase(0);
8✔
373
    col.erase(0);
8✔
374
    col.erase(0);
8✔
375
    col.erase(0);
8✔
376
    col.erase(0);
8✔
377
    col.erase(0);
8✔
378
    col.erase(0);
8✔
379
    CHECK(ndx.is_empty());
8✔
380
}
8✔
381

382
TEST_TYPES(StringIndex_Delete, string_column, nullable_string_column, enum_column, nullable_enum_column)
383
{
8✔
384
    TEST_TYPE test_resources;
8✔
385
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
386

387
    col.add(s1);
8✔
388
    col.add(s2);
8✔
389
    col.add(s3);
8✔
390
    col.add(s4);
8✔
391
    col.add(s1); // duplicate value
8✔
392

393
    // Create a new index on column
394
    const SearchIndex& ndx = *col.create_search_index();
8✔
395

396
    // Delete first item (in index)
397
    col.erase(1);
8✔
398

399
    CHECK_EQUAL(0, col.find_first(s1));
8✔
400
    CHECK_EQUAL(1, col.find_first(s3));
8✔
401
    CHECK_EQUAL(2, col.find_first(s4));
8✔
402
    CHECK_EQUAL(null_key, ndx.find_first(s2));
8✔
403

404
    // Delete last item (in index)
405
    col.erase(2);
8✔
406

407
    CHECK_EQUAL(0, col.find_first(s1));
8✔
408
    CHECK_EQUAL(1, col.find_first(s3));
8✔
409
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
410
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
411

412
    // Delete middle item (in index)
413
    col.erase(1);
8✔
414

415
    CHECK_EQUAL(0, col.find_first(s1));
8✔
416
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
417
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
418
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
419

420
    // Delete all items
421
    col.erase(0);
8✔
422
    col.erase(0);
8✔
423
    CHECK(ndx.is_empty());
8✔
424
}
8✔
425

426

427
TEST_TYPES(StringIndex_ClearEmpty, string_column, nullable_string_column, enum_column, nullable_enum_column)
428
{
8✔
429
    TEST_TYPE test_resources;
8✔
430
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
431

432
    // Create a new index on column
433
    const SearchIndex& ndx = *col.create_search_index();
8✔
434

435
    // Clear to remove all entries
436
    col.clear();
8✔
437
    CHECK(ndx.is_empty());
8✔
438
}
8✔
439

440
TEST_TYPES(StringIndex_Clear, string_column, nullable_string_column, enum_column, nullable_enum_column)
441
{
8✔
442
    TEST_TYPE test_resources;
8✔
443
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
444

445
    col.add(s1);
8✔
446
    col.add(s2);
8✔
447
    col.add(s3);
8✔
448
    col.add(s4);
8✔
449
    col.add(s1); // duplicate value
8✔
450
    col.add(s5); // common prefix
8✔
451
    col.add(s6); // common prefix
8✔
452

453
    // Create a new index on column
454
    const SearchIndex& ndx = *col.create_search_index();
8✔
455

456
    // Clear to remove all entries
457
    col.clear();
8✔
458
    CHECK(ndx.is_empty());
8✔
459

460
    // Re-insert values
461
    col.add(s1);
8✔
462
    col.add(s2);
8✔
463
    col.add(s3);
8✔
464
    col.add(s4);
8✔
465
    col.add(s1); // duplicate value
8✔
466
    col.add(s5); // common prefix
8✔
467
    col.add(s6); // common prefix
8✔
468

469
    const ObjKey r1 = ndx.find_first(s1);
8✔
470
    const ObjKey r2 = ndx.find_first(s2);
8✔
471
    const ObjKey r3 = ndx.find_first(s3);
8✔
472
    const ObjKey r4 = ndx.find_first(s4);
8✔
473
    const ObjKey r5 = ndx.find_first(s5);
8✔
474
    const ObjKey r6 = ndx.find_first(s6);
8✔
475

476
    CHECK_EQUAL(col.key(0), r1);
8✔
477
    CHECK_EQUAL(col.key(1), r2);
8✔
478
    CHECK_EQUAL(col.key(2), r3);
8✔
479
    CHECK_EQUAL(col.key(3), r4);
8✔
480
    CHECK_EQUAL(col.key(5), r5);
8✔
481
    CHECK_EQUAL(col.key(6), r6);
8✔
482
}
8✔
483

484

485
TEST_TYPES(StringIndex_Set, string_column, nullable_string_column, enum_column, nullable_enum_column)
486
{
8✔
487
    TEST_TYPE test_resources;
8✔
488
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
489

490
    col.add(s1);
8✔
491
    col.add(s2);
8✔
492
    col.add(s3);
8✔
493
    col.add(s4);
8✔
494
    col.add(s1); // duplicate value
8✔
495

496
    // Create a new index on column
497
    col.create_search_index();
8✔
498

499
    // Set top value
500
    col.set(0, s5);
8✔
501

502
    CHECK_EQUAL(0, col.find_first(s5));
8✔
503
    CHECK_EQUAL(1, col.find_first(s2));
8✔
504
    CHECK_EQUAL(2, col.find_first(s3));
8✔
505
    CHECK_EQUAL(3, col.find_first(s4));
8✔
506
    CHECK_EQUAL(4, col.find_first(s1));
8✔
507

508
    // Set bottom value
509
    col.set(4, s6);
8✔
510

511
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
512
    CHECK_EQUAL(0, col.find_first(s5));
8✔
513
    CHECK_EQUAL(1, col.find_first(s2));
8✔
514
    CHECK_EQUAL(2, col.find_first(s3));
8✔
515
    CHECK_EQUAL(3, col.find_first(s4));
8✔
516
    CHECK_EQUAL(4, col.find_first(s6));
8✔
517

518
    // Set middle value
519
    col.set(2, s7);
8✔
520

521
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
522
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
523
    CHECK_EQUAL(0, col.find_first(s5));
8✔
524
    CHECK_EQUAL(1, col.find_first(s2));
8✔
525
    CHECK_EQUAL(2, col.find_first(s7));
8✔
526
    CHECK_EQUAL(3, col.find_first(s4));
8✔
527
    CHECK_EQUAL(4, col.find_first(s6));
8✔
528
}
8✔
529

530
TEST_TYPES(StringIndex_Count, string_column, nullable_string_column, enum_column, nullable_enum_column)
531
{
8✔
532
    TEST_TYPE test_resources;
8✔
533
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
534

535
    col.add(s1);
8✔
536
    col.add(s2);
8✔
537
    col.add(s2);
8✔
538
    col.add(s3);
8✔
539
    col.add(s3);
8✔
540
    col.add(s3);
8✔
541
    col.add(s4);
8✔
542
    col.add(s4);
8✔
543
    col.add(s4);
8✔
544
    col.add(s4);
8✔
545

546
    // Create a new index on column
547
    col.create_search_index();
8✔
548

549
    // Counts
550
    const size_t c0 = col.count(s5);
8✔
551
    const size_t c1 = col.count(s1);
8✔
552
    const size_t c2 = col.count(s2);
8✔
553
    const size_t c3 = col.count(s3);
8✔
554
    const size_t c4 = col.count(s4);
8✔
555
    CHECK_EQUAL(0, c0);
8✔
556
    CHECK_EQUAL(1, c1);
8✔
557
    CHECK_EQUAL(2, c2);
8✔
558
    CHECK_EQUAL(3, c3);
8✔
559
    CHECK_EQUAL(4, c4);
8✔
560
}
8✔
561

562
TEST_TYPES(StringIndex_Distinct, string_column, nullable_string_column, enum_column, nullable_enum_column)
563
{
8✔
564
    TEST_TYPE test_resources;
8✔
565
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
566

567
    col.add(s1);
8✔
568
    col.add(s2);
8✔
569
    col.add(s2);
8✔
570
    col.add(s3);
8✔
571
    col.add(s3);
8✔
572
    col.add(s3);
8✔
573
    col.add(s4);
8✔
574
    col.add(s4);
8✔
575
    col.add(s4);
8✔
576
    col.add(s4);
8✔
577

578
    // Create a new index on column
579
    const SearchIndex* ndx = col.create_search_index();
8✔
580
    CHECK(ndx->has_duplicate_values());
8✔
581
}
8✔
582

583
TEST_TYPES(StringIndex_FindAllNoCopy, string_column, nullable_string_column, enum_column, nullable_enum_column)
584
{
8✔
585
    TEST_TYPE test_resources;
8✔
586
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
587

588
    col.add(s1);
8✔
589
    col.add(s2);
8✔
590
    col.add(s2);
8✔
591
    col.add(s3);
8✔
592
    col.add(s3);
8✔
593
    col.add(s3);
8✔
594
    col.add(s4);
8✔
595
    col.add(s4);
8✔
596
    col.add(s4);
8✔
597
    col.add(s4);
8✔
598

599
    // Create a new index on column
600
    const SearchIndex& ndx = *col.create_search_index();
8✔
601

602
    InternalFindResult ref_2;
8✔
603
    FindRes res1 = ndx.find_all_no_copy(StringData("not there"), ref_2);
8✔
604
    CHECK_EQUAL(FindRes_not_found, res1);
8✔
605

606
    FindRes res2 = ndx.find_all_no_copy(s1, ref_2);
8✔
607
    CHECK_EQUAL(FindRes_single, res2);
8✔
608
    CHECK_EQUAL(0, ref_2.payload);
8✔
609

610
    FindRes res3 = ndx.find_all_no_copy(s4, ref_2);
8✔
611
    CHECK_EQUAL(FindRes_column, res3);
8✔
612
    BPlusTree<ObjKey> results(Allocator::get_default());
8✔
613
    results.init_from_ref(ref_type(ref_2.payload));
8✔
614

615
    CHECK_EQUAL(4, ref_2.end_ndx - ref_2.start_ndx);
8✔
616
    CHECK_EQUAL(4, results.size());
8✔
617
    CHECK_EQUAL(col.key(6), results.get(0));
8✔
618
    CHECK_EQUAL(col.key(7), results.get(1));
8✔
619
    CHECK_EQUAL(col.key(8), results.get(2));
8✔
620
    CHECK_EQUAL(col.key(9), results.get(3));
8✔
621
}
8✔
622

623
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
624
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
625
// of copying it to you, as a performance optimization.
626
TEST(StringIndex_FindAllNoCopy2_Int)
627
{
2✔
628
    // Create a column with duplcate values
629
    column<Int> test_resources;
2✔
630
    auto col = test_resources.get_column();
2✔
631

632
    for (auto i : ints)
2✔
633
        col.add(i);
20✔
634

635
    // Create a new index on column
636
    col.create_search_index();
2✔
637
    const SearchIndex& ndx = *col.create_search_index();
2✔
638
    InternalFindResult results;
2✔
639

640
    for (auto i : ints) {
20✔
641
        FindRes res = ndx.find_all_no_copy(i, results);
20✔
642

643
        size_t real = 0;
20✔
644
        for (auto j : ints) {
200✔
645
            if (i == j)
200✔
646
                real++;
24✔
647
        }
200✔
648

649
        if (real == 1) {
20✔
650
            CHECK_EQUAL(res, FindRes_single);
16✔
651
            CHECK_EQUAL(i, ints[size_t(results.payload)]);
16✔
652
        }
16✔
653
        else if (real > 1) {
4✔
654
            CHECK_EQUAL(FindRes_column, res);
4✔
655
            const IntegerColumn results_column(Allocator::get_default(), ref_type(results.payload));
4✔
656
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
4✔
657
            CHECK_EQUAL(real, results_column.size());
4✔
658
            for (size_t y = 0; y < real; y++)
12✔
659
                CHECK_EQUAL(i, ints[size_t(results_column.get(y))]);
8✔
660
        }
4✔
661
    }
20✔
662
}
2✔
663

664
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
665
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
666
// of copying it to you, as a performance optimization.
667
TEST(StringIndex_FindAllNoCopy2_IntNull)
668
{
2✔
669
    // Create a column with duplcate values
670
    column<Int> test_resources(true);
2✔
671
    auto col = test_resources.get_column();
2✔
672

673
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++)
8✔
674
        col.add(ints[t]);
6✔
675
    col.add_null();
2✔
676

677
    // Create a new index on column
678
    const SearchIndex& ndx = *col.create_search_index();
2✔
679
    InternalFindResult results;
2✔
680

681
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++) {
8✔
682
        FindRes res = ndx.find_all_no_copy(ints[t], results);
6✔
683

684
        size_t real = 0;
6✔
685
        for (size_t y = 0; y < sizeof(ints) / sizeof(ints[0]); y++) {
24✔
686
            if (ints[t] == ints[y])
18✔
687
                real++;
6✔
688
        }
18✔
689

690
        if (real == 1) {
6✔
691
            CHECK_EQUAL(res, FindRes_single);
6✔
692
            CHECK_EQUAL(ints[t], ints[size_t(results.payload)]);
6✔
693
        }
6✔
694
        else if (real > 1) {
×
695
            CHECK_EQUAL(FindRes_column, res);
×
696
            const IntegerColumn results2(Allocator::get_default(), ref_type(results.payload));
×
697
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
×
698
            CHECK_EQUAL(real, results2.size());
×
699
            for (size_t y = 0; y < real; y++)
×
700
                CHECK_EQUAL(ints[t], ints[size_t(results2.get(y))]);
×
701
        }
×
702
    }
6✔
703

704
    FindRes res = ndx.find_all_no_copy(null{}, results);
2✔
705
    CHECK_EQUAL(FindRes_single, res);
2✔
706
    CHECK_EQUAL(results.payload, col.size() - 1);
2✔
707
}
2✔
708

709
TEST_TYPES(StringIndex_FindAllNoCopyCommonPrefixStrings, string_column, nullable_string_column, enum_column,
710
           nullable_enum_column)
711
{
8✔
712
    TEST_TYPE test_resources;
8✔
713
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
714
    const SearchIndex& ndx = *col.create_search_index();
8✔
715

716
    auto test_prefix_find = [&](std::string prefix) {
24✔
717
        std::string prefix_b = prefix + "b";
24✔
718
        std::string prefix_c = prefix + "c";
24✔
719
        std::string prefix_d = prefix + "d";
24✔
720
        std::string prefix_e = prefix + "e";
24✔
721
        StringData spb(prefix_b);
24✔
722
        StringData spc(prefix_c);
24✔
723
        StringData spd(prefix_d);
24✔
724
        StringData spe(prefix_e);
24✔
725

726
        size_t start_row = col.size();
24✔
727
        col.add(spb);
24✔
728
        col.add(spc);
24✔
729
        col.add(spc);
24✔
730
        col.add(spe);
24✔
731
        col.add(spe);
24✔
732
        col.add(spe);
24✔
733

734
        InternalFindResult results;
24✔
735
        FindRes res = ndx.find_all_no_copy(spb, results);
24✔
736
        CHECK_EQUAL(res, FindRes_single);
24✔
737
        CHECK_EQUAL(results.payload, start_row);
24✔
738

739
        res = ndx.find_all_no_copy(spc, results);
24✔
740
        CHECK_EQUAL(res, FindRes_column);
24✔
741
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 2);
24✔
742
        const IntegerColumn results_c(Allocator::get_default(), ref_type(results.payload));
24✔
743
        CHECK_EQUAL(results_c.get(results.start_ndx), start_row + 1);
24✔
744
        CHECK_EQUAL(results_c.get(results.start_ndx + 1), start_row + 2);
24✔
745
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx))), spc);
24✔
746
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx + 1))), spc);
24✔
747

748
        res = ndx.find_all_no_copy(spd, results);
24✔
749
        CHECK_EQUAL(res, FindRes_not_found);
24✔
750

751
        res = ndx.find_all_no_copy(spe, results);
24✔
752
        CHECK_EQUAL(res, FindRes_column);
24✔
753
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 3);
24✔
754
        const IntegerColumn results_e(Allocator::get_default(), ref_type(results.payload));
24✔
755
        CHECK_EQUAL(results_e.get(results.start_ndx), start_row + 3);
24✔
756
        CHECK_EQUAL(results_e.get(results.start_ndx + 1), start_row + 4);
24✔
757
        CHECK_EQUAL(results_e.get(results.start_ndx + 2), start_row + 5);
24✔
758
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx))), spe);
24✔
759
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 1))), spe);
24✔
760
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 2))), spe);
24✔
761
    };
24✔
762

763
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
764
    std::string std_over_max = std_max + "a";
8✔
765
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
766

767
    test_prefix_find(std_max);
8✔
768
    test_prefix_find(std_over_max);
8✔
769
    test_prefix_find(std_under_max);
8✔
770
}
8✔
771

772
TEST(StringIndex_Count_Int)
773
{
2✔
774
    // Create a column with duplicate values
775
    column<Int> test_resources;
2✔
776
    auto col = test_resources.get_column();
2✔
777

778
    for (auto i : ints)
2✔
779
        col.add(i);
20✔
780

781
    // Create a new index on column
782
    const SearchIndex& ndx = *col.create_search_index();
2✔
783

784
    for (auto i : ints) {
20✔
785
        size_t count = ndx.count(i);
20✔
786

787
        size_t real = 0;
20✔
788
        for (auto j : ints) {
200✔
789
            if (i == j)
200✔
790
                real++;
24✔
791
        }
200✔
792

793
        CHECK_EQUAL(real, count);
20✔
794
    }
20✔
795
}
2✔
796

797

798
TEST(StringIndex_Distinct_Int)
799
{
2✔
800
    // Create a column with duplicate values
801
    column<Int> test_resources;
2✔
802
    auto col = test_resources.get_column();
2✔
803

804
    for (auto i : ints)
2✔
805
        col.add(i);
20✔
806

807
    // Create a new index on column
808
    auto ndx = col.create_search_index();
2✔
809
    CHECK(ndx->has_duplicate_values());
2✔
810
}
2✔
811

812

813
TEST(StringIndex_Set_Add_Erase_Insert_Int)
814
{
2✔
815
    column<Int> test_resources;
2✔
816
    auto col = test_resources.get_column();
2✔
817

818
    col.add(1);
2✔
819
    col.add(2);
2✔
820
    col.add(3);
2✔
821
    col.add(2);
2✔
822

823
    // Create a new index on column
824
    const SearchIndex& ndx = *col.create_search_index();
2✔
825

826
    ObjKey f = ndx.find_first(int64_t(2));
2✔
827
    CHECK_EQUAL(col.key(1), f);
2✔
828

829
    col.set(1, 5);
2✔
830

831
    f = ndx.find_first(int64_t(2));
2✔
832
    CHECK_EQUAL(col.key(3), f);
2✔
833

834
    col.erase(1);
2✔
835

836
    f = ndx.find_first(int64_t(2));
2✔
837
    CHECK_EQUAL(col.key(2), f);
2✔
838

839
    col.insert(1, 5);
2✔
840
    CHECK_EQUAL(col.get(1), 5);
2✔
841

842
    f = ndx.find_first(int64_t(2));
2✔
843
    CHECK_EQUAL(col.key(3), f);
2✔
844

845
    col.add(7);
2✔
846
    CHECK_EQUAL(col.get(4), 7);
2✔
847
    col.set(4, 10);
2✔
848
    CHECK_EQUAL(col.get(4), 10);
2✔
849

850
    f = ndx.find_first(int64_t(10));
2✔
851
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
852

853
    col.add(9);
2✔
854
    f = ndx.find_first(int64_t(9));
2✔
855
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
856

857
    col.clear();
2✔
858
    f = ndx.find_first(int64_t(2));
2✔
859
    CHECK_EQUAL(null_key, f);
2✔
860
}
2✔
861

862
TEST(StringIndex_FuzzyTest_Int)
863
{
2✔
864
    column<Int> test_resources;
2✔
865
    auto col = test_resources.get_column();
2✔
866
    Random random(random_int<unsigned long>());
2✔
867
    const size_t n = static_cast<size_t>(1.2 * REALM_MAX_BPNODE_SIZE);
2✔
868

869
    col.create_search_index();
2✔
870

871
    for (size_t t = 0; t < n; ++t) {
2,402✔
872
        col.add(random.draw_int_max(0xffffffffffffffff));
2,400✔
873
    }
2,400✔
874

875
    for (size_t t = 0; t < n; ++t) {
2,402✔
876
        int64_t r;
2,400✔
877
        if (random.draw_bool())
2,400✔
878
            r = col.get(t);
1,220✔
879
        else
1,180✔
880
            r = random.draw_int_max(0xffffffffffffffff);
1,180✔
881

882
        size_t m = col.find_first(r);
2,400✔
883
        for (size_t t_2 = 0; t_2 < n; ++t_2) {
2,149,996✔
884
            if (col.get(t_2) == r) {
2,148,816✔
885
                CHECK_EQUAL(t_2, m);
1,220✔
886
                break;
1,220✔
887
            }
1,220✔
888
        }
2,148,816✔
889
    }
2,400✔
890
}
2✔
891

892
namespace {
893

894
// Generate string where the bit pattern in bits is converted to NUL bytes. E.g. (length=2):
895
// bits=0 -> "\0\0", bits=1 -> "\x\0", bits=2 -> "\0\x", bits=3 -> "\x\x", where x is a random byte
896
StringData create_string_with_nuls(const size_t bits, const size_t length, char* tmp, Random& random)
897
{
×
898
    for (size_t i = 0; i < length; ++i) {
×
899
        bool insert_nul_at_pos = (bits & (size_t(1) << i)) == 0;
×
900
        if (insert_nul_at_pos) {
×
901
            tmp[i] = '\0';
×
902
        }
×
903
        else {
×
904
            // Avoid stray \0 chars, since we are already testing all combinations.
905
            // All casts are necessary to preserve the bitpattern.
906
            tmp[i] = static_cast<char>(static_cast<unsigned char>(random.draw_int<unsigned int>(1, UCHAR_MAX)));
×
907
        }
×
908
    }
×
909
    return StringData(tmp, length);
×
910
}
×
911

912
} // anonymous namespace
913

914

915
// Test for generated strings of length 1..16 with all combinations of embedded NUL bytes
916
TEST_TYPES_IF(StringIndex_EmbeddedZeroesCombinations, TEST_DURATION > 1, string_column, nullable_string_column)
917
{
×
918
    TEST_TYPE test_resources;
×
919
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
920
    const SearchIndex& ndx = *col.create_search_index();
×
921

922
    constexpr unsigned int seed = 42;
×
923
    const size_t MAX_LENGTH = 16; // Test medium
×
924
    char tmp[MAX_LENGTH];         // this is a bit of a hack, that relies on the string being copied in column.add()
×
925

926
    for (size_t length = 1; length <= MAX_LENGTH; ++length) {
×
927

928
        {
×
929
            Random random(seed);
×
930
            const size_t combinations = size_t(1) << length;
×
931
            for (size_t i = 0; i < combinations; ++i) {
×
932
                StringData str = create_string_with_nuls(i, length, tmp, random);
×
933
                col.add(str);
×
934
            }
×
935
        }
×
936

937
        // check index up to this length
938
        size_t expected_index = 0;
×
939
        for (size_t l = 1; l <= length; ++l) {
×
940
            Random random(seed);
×
941
            const size_t combinations = size_t(1) << l;
×
942
            for (size_t i = 0; i < combinations; ++i) {
×
943
                StringData needle = create_string_with_nuls(i, l, tmp, random);
×
944
                CHECK_EQUAL(ndx.find_first(needle), col.key(expected_index));
×
945
                CHECK(strncmp(col.get(expected_index).data(), needle.data(), l) == 0);
×
946
                CHECK_EQUAL(col.get(expected_index).size(), needle.size());
×
947
                expected_index++;
×
948
            }
×
949
        }
×
950
    }
×
951
}
×
952

953
// Tests for a bug with strings containing zeroes
954
TEST_TYPES(StringIndex_EmbeddedZeroes, string_column, nullable_string_column, enum_column, nullable_enum_column)
955
{
8✔
956
    TEST_TYPE test_resources;
8✔
957
    typename TEST_TYPE::ColumnTestType& col2 = test_resources.get_column();
8✔
958
    const SearchIndex& ndx2 = *col2.create_search_index();
8✔
959

960
    // FIXME: re-enable once embedded nuls work
961
    col2.add(StringData("\0", 1));
8✔
962
    col2.add(StringData("\1", 1));
8✔
963
    col2.add(StringData("\0\0", 2));
8✔
964
    col2.add(StringData("\0\1", 2));
8✔
965
    col2.add(StringData("\1\0", 2));
8✔
966

967
    CHECK_EQUAL(ndx2.find_first(StringData("\0", 1)), col2.key(0));
8✔
968
    CHECK_EQUAL(ndx2.find_first(StringData("\1", 1)), col2.key(1));
8✔
969
    CHECK_EQUAL(ndx2.find_first(StringData("\2", 1)), null_key);
8✔
970
    CHECK_EQUAL(ndx2.find_first(StringData("\0\0", 2)), col2.key(2));
8✔
971
    CHECK_EQUAL(ndx2.find_first(StringData("\0\1", 2)), col2.key(3));
8✔
972
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0", 2)), col2.key(4));
8✔
973
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0\0", 3)), null_key);
8✔
974

975
    // Integer index (uses String index internally)
976
    int64_t v = 1ULL << 41;
8✔
977
    column<Int> test_resources_1;
8✔
978
    auto col = test_resources_1.get_column();
8✔
979
    const SearchIndex& ndx = *col.create_search_index();
8✔
980
    col.add(1ULL << 40);
8✔
981
    auto f = ndx.find_first(v);
8✔
982
    CHECK_EQUAL(f, null_key);
8✔
983
}
8✔
984

985
TEST_TYPES(StringIndex_Null, nullable_string_column, nullable_enum_column)
986
{
4✔
987
    TEST_TYPE test_resources;
4✔
988
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
989

990
    col.add("");
4✔
991
    col.add(realm::null());
4✔
992

993
    const SearchIndex& ndx = *col.create_search_index();
4✔
994

995
    auto r1 = ndx.find_first(realm::null());
4✔
996
    CHECK_EQUAL(r1, col.key(1));
4✔
997
}
4✔
998

999

1000
TEST_TYPES(StringIndex_Zero_Crash, string_column, nullable_string_column, enum_column, nullable_enum_column)
1001
{
8✔
1002
    bool nullable = TEST_TYPE::is_nullable();
8✔
1003

1004
    // StringIndex could crash if strings ended with one or more 0-bytes
1005
    Table table;
8✔
1006
    auto col = table.add_column(type_String, "strings", nullable);
8✔
1007

1008
    auto k0 = table.create_object().set(col, StringData("")).get_key();
8✔
1009
    auto k1 = table.create_object().set(col, StringData("\0", 1)).get_key();
8✔
1010
    auto k2 = table.create_object().set(col, StringData("\0\0", 2)).get_key();
8✔
1011
    table.add_search_index(col);
8✔
1012

1013
    if (TEST_TYPE::is_enumerated())
8✔
1014
        table.enumerate_string_column(col);
4✔
1015

1016
    ObjKey t;
8✔
1017

1018
    t = table.find_first_string(col, StringData(""));
8✔
1019
    CHECK_EQUAL(k0, t);
8✔
1020

1021
    t = table.find_first_string(col, StringData("\0", 1));
8✔
1022
    CHECK_EQUAL(k1, t);
8✔
1023

1024
    t = table.find_first_string(col, StringData("\0\0", 2));
8✔
1025
    CHECK_EQUAL(k2, t);
8✔
1026
}
8✔
1027

1028
TEST_TYPES(StringIndex_Zero_Crash2, std::true_type, std::false_type)
1029
{
4✔
1030
    Random random(random_int<unsigned long>());
4✔
1031

1032
    constexpr bool add_common_prefix = TEST_TYPE::value;
4✔
1033

1034
    for (size_t iter = 0; iter < 10 + TEST_DURATION * 100; iter++) {
44✔
1035
        // StringIndex could crash if strings ended with one or more 0-bytes
1036
        Table table;
40✔
1037
        auto col = table.add_column(type_String, "string", true);
40✔
1038

1039
        table.add_search_index(col);
40✔
1040

1041
        for (size_t i = 0; i < 100 + TEST_DURATION * 1000; i++) {
4,040✔
1042
            unsigned char action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1043
            if (action == 0) {
4,000✔
1044
                table.remove_search_index(col);
31✔
1045
                table.add_search_index(col);
31✔
1046
            }
31✔
1047
            else if (action > 48 && table.size() < 10) {
3,969✔
1048
                // Generate string with equal probability of being empty, null, short, medium and long, and with
1049
                // their contents having equal proability of being either random or a duplicate of a previous
1050
                // string. When it's random, each char must have equal probability of being 0 or non-0e
1051
                static std::string buf =
1,986✔
1052
                    "This string is around 90 bytes long, which falls in the long-string type of Realm strings";
1,986✔
1053

1054
                std::string copy = buf;
1,986✔
1055

1056
                static std::string buf2 =
1,986✔
1057
                    "                                                                                         ";
1,986✔
1058
                std::string copy2 = buf2;
1,986✔
1059
                StringData sd;
1,986✔
1060

1061
                size_t len = random.draw_int_max<size_t>(3);
1,986✔
1062
                if (len == 0)
1,986✔
1063
                    len = 0;
482✔
1064
                else if (len == 1)
1,504✔
1065
                    len = 7;
496✔
1066
                else if (len == 2)
1,008✔
1067
                    len = 27;
500✔
1068
                else
508✔
1069
                    len = random.draw_int_max<size_t>(90);
508✔
1070

1071
                copy = copy.substr(0, len);
1,986✔
1072
                if (add_common_prefix) {
1,986✔
1073
                    std::string prefix(StringIndex::s_max_offset, 'a');
989✔
1074
                    copy = prefix + copy;
989✔
1075
                }
989✔
1076

1077
                if (random.draw_int_max<int>(1) == 0) {
1,986✔
1078
                    // duplicate string
1079
                    sd = StringData(copy);
987✔
1080
                }
987✔
1081
                else {
999✔
1082
                    // random string
1083
                    for (size_t t = 0; t < len; t++) {
20,415✔
1084
                        if (random.draw_int_max<int>(100) > 20)
19,416✔
1085
                            copy2[t] = 0; // zero byte
15,363✔
1086
                        else
4,053✔
1087
                            copy2[t] = static_cast<char>(random.draw_int<int>()); // random byte
4,053✔
1088
                    }
19,416✔
1089
                    // no generated string can equal "null" (our vector magic value for null) because
1090
                    // len == 4 is not possible
1091
                    copy2 = copy2.substr(0, len);
999✔
1092
                    if (add_common_prefix) {
999✔
1093
                        std::string prefix(StringIndex::s_max_offset, 'a');
505✔
1094
                        copy2 = prefix + copy2;
505✔
1095
                    }
505✔
1096
                    sd = StringData(copy2);
999✔
1097
                }
999✔
1098

1099
                bool done = false;
1,986✔
1100
                do {
1,986✔
1101
                    int64_t key_val = random.draw_int_max<int64_t>(10000);
1,986✔
1102
                    try {
1,986✔
1103
                        table.create_object(ObjKey(key_val)).set(col, sd);
1,986✔
1104
                        done = true;
1,986✔
1105
                    }
1,986✔
1106
                    catch (...) {
1,986✔
UNCOV
1107
                    }
×
1108
                } while (!done);
1,986✔
1109
                table.verify();
1,986✔
1110
            }
1,986✔
1111
            else if (table.size() > 0) {
1,983✔
1112
                // delete
1113
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
1,763✔
1114
                Obj obj = table.get_object(row);
1,763✔
1115
                obj.remove();
1,763✔
1116
            }
1,763✔
1117

1118
            action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1119
            if (table.size() > 0) {
4,000✔
1120
                // Search for value that exists
1121
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
3,550✔
1122
                Obj obj = table.get_object(row);
3,550✔
1123
                StringData sd = obj.get<String>(col);
3,550✔
1124
                ObjKey t = table.find_first_string(col, sd);
3,550✔
1125
                StringData sd2 = table.get_object(t).get<String>(col);
3,550✔
1126
                CHECK_EQUAL(sd, sd2);
3,550✔
1127
            }
3,550✔
1128
        }
4,000✔
1129
    }
40✔
1130
}
4✔
1131

1132
TEST(StringIndex_Integer_Increasing)
1133
{
2✔
1134
    const size_t rows = 2000 + 1000000 * TEST_DURATION;
2✔
1135

1136
    // StringIndex could crash if strings ended with one or more 0-bytes
1137
    Table table;
2✔
1138
    auto col = table.add_column(type_Int, "int");
2✔
1139
    table.add_search_index(col);
2✔
1140

1141
    std::multiset<int64_t> reference;
2✔
1142

1143
    for (size_t row = 0; row < rows; row++) {
4,002✔
1144
        int64_t r = fastrand((TEST_DURATION == 0) ? 2000 : 0x100000);
4,000✔
1145
        table.create_object().set(col, r);
4,000✔
1146
        reference.insert(r);
4,000✔
1147
    }
4,000✔
1148

1149
    for (auto obj : table) {
4,000✔
1150
        int64_t v = obj.get<Int>(col);
4,000✔
1151
        size_t c = table.count_int(col, v);
4,000✔
1152
        size_t ref_count = reference.count(v);
4,000✔
1153
        CHECK_EQUAL(c, ref_count);
4,000✔
1154
    }
4,000✔
1155
}
2✔
1156

1157
TEST_TYPES(StringIndex_Duplicate_Values, string_column, nullable_string_column, enum_column, nullable_enum_column)
1158
{
8✔
1159
    TEST_TYPE test_resources;
8✔
1160
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1161

1162
    col.add(s1);
8✔
1163
    col.add(s2);
8✔
1164
    col.add(s3);
8✔
1165
    col.add(s4);
8✔
1166

1167
    // Create a new index on column
1168
    const SearchIndex& ndx = *col.create_search_index();
8✔
1169

1170
    CHECK(!ndx.has_duplicate_values());
8✔
1171

1172
    col.add(s1); // duplicate value
8✔
1173

1174
    CHECK(ndx.has_duplicate_values());
8✔
1175

1176
    // remove and test again.
1177
    col.erase(4);
8✔
1178
    CHECK(!ndx.has_duplicate_values());
8✔
1179
    col.add(s1);
8✔
1180
    CHECK(ndx.has_duplicate_values());
8✔
1181
    col.erase(0);
8✔
1182
    CHECK(!ndx.has_duplicate_values());
8✔
1183
    col.clear();
8✔
1184

1185
    // check emptied set
1186
    CHECK(ndx.is_empty());
8✔
1187
    CHECK(!ndx.has_duplicate_values());
8✔
1188

1189
    const size_t num_rows = 100;
8✔
1190

1191
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1192
        std::string to_insert(util::to_string(i));
800✔
1193
        col.add(to_insert);
800✔
1194
    }
800✔
1195
    CHECK(!ndx.has_duplicate_values());
8✔
1196

1197
    std::string a_string = "a";
8✔
1198
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1199
        col.add(a_string);
800✔
1200
        a_string += "a";
800✔
1201
    }
800✔
1202
    std::string str_num_rows(util::to_string(num_rows));
8✔
1203
    CHECK(!ndx.has_duplicate_values());
8✔
1204
    col.add(a_string);
8✔
1205
    col.add(a_string);
8✔
1206
    CHECK(ndx.has_duplicate_values());
8✔
1207
    col.erase(col.size() - 1);
8✔
1208
    CHECK(!ndx.has_duplicate_values());
8✔
1209

1210
    // Insert into the middle unique value of num_rows
1211
    col.insert(num_rows / 2, str_num_rows);
8✔
1212

1213
    CHECK(!ndx.has_duplicate_values());
8✔
1214

1215
    // Set the next element to be num_rows too
1216
    col.set(num_rows / 2 + 1, str_num_rows);
8✔
1217

1218
    CHECK(ndx.has_duplicate_values());
8✔
1219

1220
    col.clear();
8✔
1221
    CHECK(!ndx.has_duplicate_values());
8✔
1222
    CHECK(col.size() == 0);
8✔
1223
}
8✔
1224

1225
TEST_TYPES(StringIndex_MaxBytes, string_column, nullable_string_column, enum_column, nullable_enum_column)
1226
{
8✔
1227
    TEST_TYPE test_resources;
8✔
1228
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1229

1230
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
1231
    std::string std_over_max(std_max + "a");
8✔
1232
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
1233
    StringData max(std_max);
8✔
1234
    StringData over_max(std_over_max);
8✔
1235
    StringData under_max(std_under_max);
8✔
1236

1237
    const SearchIndex& ndx = *col.create_search_index();
8✔
1238

1239
    CHECK_EQUAL(col.size(), 0);
8✔
1240

1241
    auto duplicate_check = [&](size_t num_dups, StringData s) {
120✔
1242
        CHECK(col.size() == 0);
120✔
1243
        for (size_t i = 0; i < num_dups; ++i) {
72,384✔
1244
            col.add(s);
72,264✔
1245
        }
72,264✔
1246
        CHECK_EQUAL(col.size(), num_dups);
120✔
1247
        CHECK(ndx.has_duplicate_values() == (num_dups > 1));
120✔
1248
        CHECK_EQUAL(col.get(0), s);
120✔
1249
        CHECK_EQUAL(col.count(s), num_dups);
120✔
1250
        CHECK_EQUAL(col.find_first(s), 0);
120✔
1251
        col.clear();
120✔
1252
    };
120✔
1253

1254
    std::vector<size_t> num_duplicates_list = {
8✔
1255
        1, 10, REALM_MAX_BPNODE_SIZE - 1, REALM_MAX_BPNODE_SIZE, REALM_MAX_BPNODE_SIZE + 1,
8✔
1256
    };
8✔
1257
    for (auto& dups : num_duplicates_list) {
40✔
1258
        duplicate_check(dups, under_max);
40✔
1259
        duplicate_check(dups, max);
40✔
1260
        duplicate_check(dups, over_max);
40✔
1261
    }
40✔
1262
}
8✔
1263

1264

1265
// There is a corner case where two very long strings are
1266
// inserted into the string index which are identical except
1267
// for the characters at the end (they have an identical very
1268
// long prefix). This was causing a stack overflow because of
1269
// the recursive nature of the insert function.
1270
TEST_TYPES(StringIndex_InsertLongPrefix, string_column, nullable_string_column, enum_column, nullable_enum_column)
1271
{
8✔
1272
    TEST_TYPE test_resources;
8✔
1273
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1274
    const SearchIndex& ndx = *col.create_search_index();
8✔
1275

1276
    col.add("test_index_string1");
8✔
1277
    col.add("test_index_string2");
8✔
1278

1279
    CHECK_EQUAL(col.find_first("test_index_string1"), 0);
8✔
1280
    CHECK_EQUAL(col.find_first("test_index_string2"), 1);
8✔
1281

1282
    std::string std_base(107, 'a');
8✔
1283
    std::string std_base_b = std_base + "b";
8✔
1284
    std::string std_base_c = std_base + "c";
8✔
1285
    StringData base_b(std_base_b);
8✔
1286
    StringData base_c(std_base_c);
8✔
1287
    col.add(base_b);
8✔
1288
    ndx.verify();
8✔
1289
    col.add(base_c);
8✔
1290
    ndx.verify();
8✔
1291

1292
    CHECK_EQUAL(col.find_first(base_b), 2);
8✔
1293
    CHECK_EQUAL(col.find_first(base_c), 3);
8✔
1294

1295
    // To trigger the bug, the length must be more than 10000.
1296
    // Array::destroy_deep() will stack overflow at around recursion depths of
1297
    // lengths > 90000 on mac and less on android devices.
1298
    std::string std_base2(100000, 'a');
8✔
1299
    std::string std_base2_b = std_base2 + "b";
8✔
1300
    std::string std_base2_c = std_base2 + "c";
8✔
1301
    StringData base2(std_base2);
8✔
1302
    StringData base2_b(std_base2_b);
8✔
1303
    StringData base2_c(std_base2_c);
8✔
1304
    col.add(base2_b);
8✔
1305
    ndx.verify();
8✔
1306
    col.add(base2_c);
8✔
1307
    ndx.verify();
8✔
1308

1309
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1310
    CHECK_EQUAL(col.find_first(base2_c), 5);
8✔
1311

1312
    col.add(base2);
8✔
1313
    CHECK(!ndx.has_duplicate_values());
8✔
1314
    ndx.verify();
8✔
1315
    col.add(base2_b); // adds a duplicate in the middle of the list
8✔
1316

1317
    CHECK(ndx.has_duplicate_values());
8✔
1318
    std::vector<ObjKey> find_all_result;
8✔
1319
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1320
    ndx.find_all(find_all_result, base2_b);
8✔
1321
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1322
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1323
    CHECK_EQUAL(find_all_result[1], col.key(7));
8✔
1324
    find_all_result.clear();
8✔
1325
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1326
    col.verify();
8✔
1327

1328
    col.erase(7);
8✔
1329
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1330
    CHECK_EQUAL(ndx.count(base2_b), 1);
8✔
1331
    ndx.find_all(find_all_result, base2_b);
8✔
1332
    CHECK_EQUAL(find_all_result.size(), 1);
8✔
1333
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1334
    find_all_result.clear();
8✔
1335
    col.verify();
8✔
1336

1337
    col.set(6, base2_b);
8✔
1338
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1339
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1340
    ndx.find_all(find_all_result, base2_b);
8✔
1341
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1342
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1343
    CHECK_EQUAL(find_all_result[1], col.key(6));
8✔
1344
    col.verify();
8✔
1345

1346
    col.clear(); // calls recursive function Array::destroy_deep()
8✔
1347
}
8✔
1348

1349
TEST_TYPES(StringIndex_InsertLongPrefixAndQuery, string_column, nullable_string_column, enum_column,
1350
           nullable_enum_column)
1351
{
8✔
1352
    constexpr int half_node_size = REALM_MAX_BPNODE_SIZE / 2;
8✔
1353
    bool nullable_column = TEST_TYPE::is_nullable();
8✔
1354
    Group g;
8✔
1355
    auto t = g.add_table("StringsOnly");
8✔
1356
    auto col = t->add_column(type_String, "first", nullable_column);
8✔
1357
    t->add_search_index(col);
8✔
1358

1359
    std::string base(StringIndex::s_max_offset, 'a');
8✔
1360
    std::string str_a = base + "aaaaa";
8✔
1361
    std::string str_a0 = base + "aaaa0";
8✔
1362
    std::string str_ax = base + "aaaax";
8✔
1363
    std::string str_b = base + "bbbbb";
8✔
1364
    std::string str_c = base + "ccccc";
8✔
1365
    std::string str_c0 = base + "cccc0";
8✔
1366
    std::string str_cx = base + "ccccx";
8✔
1367

1368
    for (int i = 0; i < half_node_size * 3; i++) {
12,008✔
1369
        t->create_object().set(col, str_a);
12,000✔
1370
        t->create_object().set(col, str_b);
12,000✔
1371
        t->create_object().set(col, str_c);
12,000✔
1372
    }
12,000✔
1373
    t->create_object().set(col, str_ax);
8✔
1374
    t->create_object().set(col, str_ax);
8✔
1375
    t->create_object().set(col, str_a0);
8✔
1376
    /*
1377
    {
1378
        std::ofstream o("index.dot");
1379
        index->to_dot(o, "");
1380
    }
1381
    */
1382
    if (TEST_TYPE::is_enumerated())
8✔
1383
        t->enumerate_string_column(col);
4✔
1384

1385
    auto ndx_a = t->where().equal(col, StringData(str_a)).find();
8✔
1386
    auto cnt = t->count_string(col, StringData(str_a));
8✔
1387
    auto tw_a = t->where().equal(col, StringData(str_a)).find_all();
8✔
1388
    CHECK_EQUAL(ndx_a, ObjKey(0));
8✔
1389
    CHECK_EQUAL(cnt, half_node_size * 3);
8✔
1390
    CHECK_EQUAL(tw_a.size(), half_node_size * 3);
8✔
1391
    ndx_a = t->where().equal(col, StringData(str_c0)).find();
8✔
1392
    CHECK_EQUAL(ndx_a, null_key);
8✔
1393
    ndx_a = t->where().equal(col, StringData(str_cx)).find();
8✔
1394
    CHECK_EQUAL(ndx_a, null_key);
8✔
1395
    // Find string that is 'less' than strings in the table, but with identical last key
1396
    tw_a = t->where().equal(col, StringData(str_c0)).find_all();
8✔
1397
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1398
    // Find string that is 'greater' than strings in the table, but with identical last key
1399
    tw_a = t->where().equal(col, StringData(str_cx)).find_all();
8✔
1400
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1401

1402
    // Same as above, but just for 'count' method
1403
    cnt = t->count_string(col, StringData(str_c0));
8✔
1404
    CHECK_EQUAL(cnt, 0);
8✔
1405
    cnt = t->count_string(col, StringData(str_cx));
8✔
1406
    CHECK_EQUAL(cnt, 0);
8✔
1407
}
8✔
1408

1409

1410
TEST(StringIndex_Fuzzy)
1411
{
2✔
1412
    constexpr size_t chunkcount = 50;
2✔
1413
    constexpr size_t rowcount = 100 + 1000 * TEST_DURATION;
2✔
1414

1415
    for (size_t main_rounds = 0; main_rounds < 2 + 10 * TEST_DURATION; main_rounds++) {
6✔
1416

1417
        Group g;
4✔
1418

1419
        auto t = g.add_table("StringsOnly");
4✔
1420
        auto col0 = t->add_column(type_String, "first");
4✔
1421
        auto col1 = t->add_column(type_String, "second");
4✔
1422

1423
        t->add_search_index(col0);
4✔
1424

1425
        std::string strings[chunkcount];
4✔
1426

1427
        for (size_t j = 0; j < chunkcount; j++) {
204✔
1428
            size_t len = fastrand() % REALM_MAX_BPNODE_SIZE;
200✔
1429

1430
            for (size_t i = 0; i < len; i++)
100,966✔
1431
                strings[j] += char(fastrand());
100,766✔
1432
        }
200✔
1433

1434
        for (size_t rows = 0; rows < rowcount; rows++) {
404✔
1435
            // Strings consisting of 2 concatenated strings are very interesting
1436
            size_t chunks;
400✔
1437
            if (fastrand() % 2 == 0)
400✔
1438
                chunks = fastrand() % 4;
196✔
1439
            else
204✔
1440
                chunks = 2;
204✔
1441

1442
            std::string str;
400✔
1443

1444
            for (size_t c = 0; c < chunks; c++) {
1,121✔
1445
                str += strings[fastrand() % chunkcount];
721✔
1446
            }
721✔
1447

1448
            t->create_object().set_all(str, str);
400✔
1449
        }
400✔
1450

1451
        for (size_t rounds = 0; rounds < 1 + 10 * TEST_DURATION; rounds++) {
8✔
1452
            for (auto obj : *t) {
400✔
1453

1454
                TableView tv0 = (t->column<String>(col0) == obj.get<String>(col0)).find_all();
400✔
1455
                TableView tv1 = (t->column<String>(col1) == obj.get<String>(col1)).find_all();
400✔
1456

1457
                CHECK_EQUAL(tv0.size(), tv1.size());
400✔
1458

1459
                for (size_t v = 0; v < tv0.size(); v++) {
1,164✔
1460
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
764✔
1461
                }
764✔
1462
            }
400✔
1463

1464

1465
            for (size_t r = 0; r < 5 + 1000 * TEST_DURATION; r++) {
24✔
1466
                size_t chunks;
20✔
1467
                if (fastrand() % 2 == 0)
20✔
1468
                    chunks = fastrand() % 4;
10✔
1469
                else
10✔
1470
                    chunks = 2;
10✔
1471

1472
                std::string str;
20✔
1473

1474
                for (size_t c = 0; c < chunks; c++) {
54✔
1475
                    str += strings[fastrand() % chunkcount];
34✔
1476
                }
34✔
1477

1478
                TableView tv0 = (t->column<String>(col0) == str).find_all();
20✔
1479
                TableView tv1 = (t->column<String>(col1) == str).find_all();
20✔
1480

1481
                CHECK_EQUAL(tv0.size(), tv1.size());
20✔
1482

1483
                for (size_t v = 0; v < tv0.size(); v++) {
52✔
1484
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
32✔
1485
                }
32✔
1486
            }
20✔
1487
            if (t->size() > 10)
4✔
1488
                t->get_object(0).remove();
4✔
1489

1490
            size_t r1 = fastrand() % t->size();
4✔
1491
            size_t r2 = fastrand() % t->size();
4✔
1492

1493
            std::string str = t->get_object(r2).get<String>(col0);
4✔
1494
            Obj obj = t->get_object(r1);
4✔
1495
            obj.set<String>(col0, StringData(str));
4✔
1496
            obj.set<String>(col1, StringData(str));
4✔
1497
        }
4✔
1498
    }
4✔
1499
}
2✔
1500

1501
namespace {
1502

1503
// results returned by the index should be in ascending row order
1504
// this requirement is assumed by the query system which runs find_gte
1505
// and this will return wrong results unless the results are ordered
1506
void check_result_order(const std::vector<ObjKey>& results, TestContext& test_context)
1507
{
88✔
1508
    const size_t num_results = results.size();
88✔
1509
    for (size_t i = 1; i < num_results; ++i) {
288✔
1510
        CHECK(results[i - 1] < results[i]);
200✔
1511
    }
200✔
1512
}
88✔
1513

1514
} // end anonymous namespace
1515

1516

1517
TEST_TYPES(StringIndex_Insensitive, string_column, nullable_string_column, enum_column, nullable_enum_column)
1518
{
8✔
1519
    TEST_TYPE test_resources;
8✔
1520
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1521

1522
    const char* strings[] = {"john",
8✔
1523
                             "John",
8✔
1524
                             "jOhn",
8✔
1525
                             "JOhn",
8✔
1526
                             "joHn",
8✔
1527
                             "JoHn",
8✔
1528
                             "jOHn",
8✔
1529
                             "JOHn",
8✔
1530
                             "johN",
8✔
1531
                             "JohN",
8✔
1532
                             "jOhN",
8✔
1533
                             "JOhN",
8✔
1534
                             "joHN",
8✔
1535
                             "JoHN",
8✔
1536
                             "jOHN",
8✔
1537
                             "JOHN",
8✔
1538
                             "john" /* yes, an extra to test the "bucket" case as well */,
8✔
1539
                             "hans",
8✔
1540
                             "Hansapark",
8✔
1541
                             "george",
8✔
1542
                             "billion dollar startup",
8✔
1543
                             "abcde",
8✔
1544
                             "abcdE",
8✔
1545
                             "Abcde",
8✔
1546
                             "AbcdE",
8✔
1547
                             "common",
8✔
1548
                             "common"};
8✔
1549

1550
    for (const char* string : strings) {
216✔
1551
        col.add(string);
216✔
1552
    }
216✔
1553

1554
    // Generate 255 strings with 1..255 'a' chars
1555
    for (int i = 1; i < 256; ++i) {
2,048✔
1556
        col.add(std::string(i, 'a').c_str());
2,040✔
1557
    }
2,040✔
1558

1559
    // Create a new index on column
1560
    const SearchIndex& ndx = *col.create_search_index();
8✔
1561

1562
    std::vector<ObjKey> results;
8✔
1563
    {
8✔
1564
        // case sensitive
1565
        ndx.find_all(results, strings[0]);
8✔
1566
        CHECK_EQUAL(2, results.size());
8✔
1567
        CHECK_EQUAL(col.get(results[0]), strings[0]);
8✔
1568
        CHECK_EQUAL(col.get(results[1]), strings[0]);
8✔
1569
        check_result_order(results, test_context);
8✔
1570
        results.clear();
8✔
1571
    }
8✔
1572

1573
    {
8✔
1574
        constexpr bool case_insensitive = true;
8✔
1575
        const char* needle = "john";
8✔
1576
        auto upper_needle = case_map(needle, true);
8✔
1577
        ndx.find_all(results, needle, case_insensitive);
8✔
1578
        CHECK_EQUAL(17, results.size());
8✔
1579
        for (size_t i = 0; i < results.size(); ++i) {
144✔
1580
            auto upper_result = case_map(col.get(results[i]), true);
136✔
1581
            CHECK_EQUAL(upper_result, upper_needle);
136✔
1582
        }
136✔
1583
        check_result_order(results, test_context);
8✔
1584
        results.clear();
8✔
1585
    }
8✔
1586

1587

1588
    {
8✔
1589
        struct TestData {
8✔
1590
            const bool case_insensitive;
8✔
1591
            const char* const needle;
8✔
1592
            const size_t result_size;
8✔
1593
        };
8✔
1594

1595
        TestData td[] = {
8✔
1596
            {true, "Hans", 1},
8✔
1597
            {true, "Geor", 0},
8✔
1598
            {true, "George", 1},
8✔
1599
            {true, "geoRge", 1},
8✔
1600
            {true, "Billion Dollar Startup", 1},
8✔
1601
            {true, "ABCDE", 4},
8✔
1602
            {true, "commON", 2},
8✔
1603
        };
8✔
1604

1605
        for (const TestData& t : td) {
56✔
1606
            ndx.find_all(results, t.needle, t.case_insensitive);
56✔
1607
            CHECK_EQUAL(t.result_size, results.size());
56✔
1608
            check_result_order(results, test_context);
56✔
1609
            results.clear();
56✔
1610
        }
56✔
1611
    }
8✔
1612

1613
    // Test generated 'a'-strings
1614
    for (int i = 1; i < 256; ++i) {
2,048✔
1615
        const std::string str = std::string(i, 'A');
2,040✔
1616
        ndx.find_all(results, str.c_str(), false);
2,040✔
1617
        CHECK_EQUAL(0, results.size());
2,040✔
1618
        ndx.find_all(results, str.c_str(), true);
2,040✔
1619
        CHECK_EQUAL(1, results.size());
2,040✔
1620
        results.clear();
2,040✔
1621
    }
2,040✔
1622
}
8✔
1623

1624

1625
/* Disabled until we have better support for case mapping unicode characters
1626

1627
TEST_TYPES(StringIndex_Insensitive_Unicode, non_nullable, nullable)
1628
{
1629
    constexpr bool nullable = TEST_TYPE::value;
1630

1631
    // Create a column with string values
1632
    ref_type ref = StringColumn::create(Allocator::get_default());
1633
    StringColumn col(Allocator::get_default(), ref, nullable);
1634

1635
    const char* strings[] = {
1636
        "æøå", "ÆØÅ",
1637
    };
1638

1639
    for (const char* string : strings) {
1640
        col.add(string);
1641
    }
1642

1643
    // Create a new index on column
1644
    const SearchIndex& ndx = *col.create_search_index();
1645

1646
    ref_type results_ref = IntegerColumn::create(Allocator::get_default());
1647
    IntegerColumn results(Allocator::get_default(), results_ref);
1648

1649
    {
1650
        struct TestData {
1651
            const bool case_insensitive;
1652
            const char* const needle;
1653
            const size_t result_size;
1654
        };
1655

1656
        TestData td[] = {
1657
            {false, "æøå", 1},
1658
            {false, "ÆØÅ", 1},
1659
            {true, "æøå", 2},
1660
            {true, "Æøå", 2},
1661
            {true, "æØå", 2},
1662
            {true, "ÆØå", 2},
1663
            {true, "æøÅ", 2},
1664
            {true, "ÆøÅ", 2},
1665
            {true, "æØÅ", 2},
1666
            {true, "ÆØÅ", 2},
1667
        };
1668

1669
        for (const TestData& t : td) {
1670
            ndx.find_all(results, t.needle, t.case_insensitive);
1671
            CHECK_EQUAL(t.result_size, results.size());
1672
            results.clear();
1673
        }
1674
    }
1675

1676
    // Clean up
1677
    results.destroy();
1678
    col.destroy();
1679
}
1680

1681
*/
1682

1683

1684
TEST_TYPES(StringIndex_45, string_column, nullable_string_column, enum_column, nullable_enum_column)
1685
{
8✔
1686
    TEST_TYPE test_resources;
8✔
1687
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1688
    const SearchIndex& ndx = *col.create_search_index();
8✔
1689
    std::string a4 = std::string(4, 'a');
8✔
1690
    std::string A5 = std::string(5, 'A');
8✔
1691

1692
    col.add(a4);
8✔
1693
    col.add(a4);
8✔
1694

1695
    std::vector<ObjKey> res;
8✔
1696

1697
    ndx.find_all(res, A5.c_str(), true);
8✔
1698
    CHECK_EQUAL(res.size(), 0);
8✔
1699
}
8✔
1700

1701

1702
namespace {
1703

1704
std::string create_random_a_string(size_t max_len)
1705
{
×
1706
    std::string s;
×
1707
    size_t len = size_t(fastrand(max_len));
×
1708
    for (size_t p = 0; p < len; p++) {
×
1709
        s += fastrand(1) == 0 ? 'a' : 'A';
×
1710
    }
×
1711
    return s;
×
1712
}
×
1713

1714
} // namespace
1715

1716

1717
// Excluded when run with valgrind because it takes a long time
1718
TEST_TYPES_IF(StringIndex_Insensitive_Fuzz, TEST_DURATION > 1, string_column, nullable_string_column, enum_column,
1719
              nullable_enum_column)
1720
{
×
1721
    const size_t max_str_len = 9;
×
1722
    const size_t iters = 3;
×
1723

1724
    for (size_t iter = 0; iter < iters; iter++) {
×
1725
        TEST_TYPE test_resources;
×
1726
        typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
1727

1728
        size_t rows = size_t(fastrand(2 * REALM_MAX_BPNODE_SIZE - 1));
×
1729

1730
        // Add 'rows' number of rows in the column
1731
        for (size_t t = 0; t < rows; t++) {
×
1732
            std::string str = create_random_a_string(max_str_len);
×
1733
            col.add(str);
×
1734
        }
×
1735

1736
        const SearchIndex& ndx = *col.create_search_index();
×
1737

1738
        for (size_t t = 0; t < 1000; t++) {
×
1739
            std::string needle = create_random_a_string(max_str_len);
×
1740

1741
            std::vector<ObjKey> res;
×
1742

1743
            ndx.find_all(res, needle.c_str(), true);
×
1744
            check_result_order(res, test_context);
×
1745

1746
            // Check that all items in 'res' point at a match in 'col'
1747
            auto needle_upper = case_map(needle, true);
×
1748
            for (size_t res_ndx = 0; res_ndx < res.size(); res_ndx++) {
×
1749
                auto res_upper = case_map(col.get(res[res_ndx]), true);
×
1750
                CHECK_EQUAL(res_upper, needle_upper);
×
1751
            }
×
1752

1753
            // Check that all matches in 'col' exist in 'res'
1754
            for (size_t col_ndx = 0; col_ndx < col.size(); col_ndx++) {
×
1755
                auto str_upper = case_map(col.get(col_ndx), true);
×
1756
                if (str_upper == needle_upper) {
×
1757
                    CHECK(std::find(res.begin(), res.end(), col.key(col_ndx)) != res.end());
×
1758
                }
×
1759
            }
×
1760
        }
×
1761
    }
×
1762
}
×
1763

1764
// Exercise the StringIndex case insensitive search for strings with very long, common prefixes
1765
// to cover the special case code paths where different strings are stored in a list.
1766
TEST_TYPES(StringIndex_Insensitive_VeryLongStrings, string_column, nullable_string_column, enum_column,
1767
           nullable_enum_column)
1768
{
8✔
1769
    TEST_TYPE test_resources;
8✔
1770
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1771
    const SearchIndex& ndx = *col.create_search_index();
8✔
1772

1773
    std::string long1 = std::string(StringIndex::s_max_offset + 10, 'a');
8✔
1774
    std::string long2 = long1 + "b";
8✔
1775
    std::string long3 = long1 + "c";
8✔
1776

1777
    // Add the strings in a "random" order
1778
    col.add(long1);
8✔
1779
    col.add(long2);
8✔
1780
    col.add(long2);
8✔
1781
    col.add(long1);
8✔
1782
    col.add(long3);
8✔
1783
    col.add(long2);
8✔
1784
    col.add(long1);
8✔
1785
    col.add(long1);
8✔
1786

1787
    std::vector<ObjKey> results;
8✔
1788

1789
    ndx.find_all(results, long1.c_str(), true);
8✔
1790
    CHECK_EQUAL(results.size(), 4);
8✔
1791
    check_result_order(results, test_context);
8✔
1792
    results.clear();
8✔
1793
    ndx.find_all(results, long2.c_str(), true);
8✔
1794
    CHECK_EQUAL(results.size(), 3);
8✔
1795
    results.clear();
8✔
1796
    ndx.find_all(results, long3.c_str(), true);
8✔
1797
    CHECK_EQUAL(results.size(), 1);
8✔
1798
    results.clear();
8✔
1799
}
8✔
1800

1801

1802
// Bug with case insensitive search on numbers that gives duplicate results
1803
TEST_TYPES(StringIndex_Insensitive_Numbers, string_column, nullable_string_column, enum_column, nullable_enum_column)
1804
{
8✔
1805
    TEST_TYPE test_resources;
8✔
1806
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1807
    const SearchIndex& ndx = *col.create_search_index();
8✔
1808

1809
    constexpr const char* number_string_16 = "1111111111111111";
8✔
1810
    constexpr const char* number_string_17 = "11111111111111111";
8✔
1811

1812
    col.add(number_string_16);
8✔
1813
    col.add(number_string_17);
8✔
1814

1815
    std::vector<ObjKey> results;
8✔
1816

1817
    ndx.find_all(results, number_string_16, true);
8✔
1818
    CHECK_EQUAL(results.size(), 1);
8✔
1819
}
8✔
1820

1821

1822
TEST_TYPES(StringIndex_Rover, string_column, nullable_string_column, enum_column, nullable_enum_column)
1823
{
8✔
1824
    TEST_TYPE test_resources;
8✔
1825
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1826

1827
    const SearchIndex& ndx = *col.create_search_index();
8✔
1828

1829
    col.add("ROVER");
8✔
1830
    col.add("Rover");
8✔
1831

1832
    std::vector<ObjKey> results;
8✔
1833

1834
    ndx.find_all(results, "rover", true);
8✔
1835
    CHECK_EQUAL(results.size(), 2);
8✔
1836
    check_result_order(results, test_context);
8✔
1837
}
8✔
1838

1839
TEST(StringIndex_QuerySingleObject)
1840
{
2✔
1841
    Group g;
2✔
1842
    auto table = g.add_table_with_primary_key("class_StringClass", type_String, "name", true);
2✔
1843
    table->create_object_with_primary_key("Foo");
2✔
1844

1845
    auto q = table->where().equal(table->get_column_key("name"), "Foo", true);
2✔
1846
    CHECK_EQUAL(q.count(), 1);
2✔
1847
    q = table->where().equal(table->get_column_key("name"), "Bar", true);
2✔
1848
    CHECK_EQUAL(q.count(), 0);
2✔
1849
}
2✔
1850

1851
TEST(StringIndex_MixedNonEmptyTable)
1852
{
2✔
1853
    Group g;
2✔
1854
    auto table = g.add_table("foo");
2✔
1855
    auto col = table->add_column(type_Mixed, "any");
2✔
1856
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1857
    table->add_search_index(col);
2✔
1858
}
2✔
1859

1860
TEST(StringIndex_MixedWithNestedCollections)
1861
{
2✔
1862
    Group g;
2✔
1863
    auto table = g.add_table("foo");
2✔
1864
    auto col = table->add_column(type_Mixed, "value");
2✔
1865
    table->add_search_index(col);
2✔
1866
    table->create_object().set(col, Mixed("apple"));
2✔
1867
    auto obj = table->create_object();
2✔
1868
    obj.set(col, Mixed("banana"));
2✔
1869

1870
    auto q = table->query("value = 'banana'");
2✔
1871

1872
    CHECK_EQUAL(q.count(), 1);
2✔
1873
    obj.set_collection(col, CollectionType::Dictionary);
2✔
1874
    CHECK_EQUAL(q.count(), 0);
2✔
1875
    obj.set(col, Mixed("banana"));
2✔
1876
    CHECK_EQUAL(q.count(), 1);
2✔
1877
}
2✔
1878

1879
TEST(StringIndex_MixedEqualBitPattern)
1880
{
2✔
1881
    Group g;
2✔
1882
    auto table = g.add_table("foo");
2✔
1883
    auto col = table->add_column(type_Mixed, "any");
2✔
1884
    table->add_search_index(col);
2✔
1885

1886
    Mixed val1(int64_t(0x6867666564636261));
2✔
1887
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1888
    // From single value to list
1889
    table->create_object().set(col, val1);
2✔
1890

1891
    auto tv = table->where().equal(col, val1).find_all();
2✔
1892
    CHECK_EQUAL(tv.size(), 1);
2✔
1893
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1894

1895
    table->clear();
2✔
1896
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1897
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1898
    // Insert in existing list
1899
    table->create_object().set(col, val1);
2✔
1900

1901
    tv = table->where().equal(col, val1).find_all();
2✔
1902
    CHECK_EQUAL(tv.size(), 1);
2✔
1903
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1904
    tv = table->where().equal(col, Mixed("abcdefgh")).find_all();
2✔
1905
    CHECK_EQUAL(tv.size(), 2);
2✔
1906

1907
    // Add another one into existing list
1908
    table->create_object().set(col, val1);
2✔
1909
    tv = table->where().equal(col, val1).find_all();
2✔
1910
    CHECK_EQUAL(tv.size(), 2);
2✔
1911
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1912
    CHECK_EQUAL(tv.get_object(1).get_any(col), val1);
2✔
1913
}
2✔
1914

1915
TEST(Unicode_Casemap)
1916
{
2✔
1917
    std::string inp = "±ÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß×÷";
2✔
1918
    auto out = case_map(inp, false);
2✔
1919
    if (CHECK(out)) {
2✔
1920
        CHECK_EQUAL(*out, "±àáâãäåæèéêëìíîïñòóôõöøùúûüýß×÷");
2✔
1921
    }
2✔
1922
    out = case_map(*out, true);
2✔
1923
    if (CHECK(out)) {
2✔
1924
        CHECK_EQUAL(*out, inp);
2✔
1925
    }
2✔
1926

1927
    inp = "A very old house 🏠 is on 🔥, we have to save the 🦄";
2✔
1928
    out = case_map(inp, true);
2✔
1929
    if (CHECK(out)) {
2✔
1930
        CHECK_EQUAL(*out, "A VERY OLD HOUSE 🏠 IS ON 🔥, WE HAVE TO SAVE THE 🦄");
2✔
1931
    }
2✔
1932

1933
    StringData trailing_garbage(inp.data(), 19); // String terminated inside icon
2✔
1934
    out = case_map(trailing_garbage, true);
2✔
1935
    CHECK_NOT(out);
2✔
1936

1937
    inp = "rødgrød med fløde";
2✔
1938
    out = case_map(inp, true);
2✔
1939
    if (CHECK(out)) {
2✔
1940
        CHECK_EQUAL(*out, "RØDGRØD MED FLØDE");
2✔
1941
    }
2✔
1942
    out = case_map(out, false);
2✔
1943
    if (CHECK(out)) {
2✔
1944
        CHECK_EQUAL(*out, inp);
2✔
1945
    }
2✔
1946
}
2✔
1947

1948
static std::string random_string(std::string::size_type length)
1949
{
60,000✔
1950
    static auto& chrs = "0123456789"
60,000✔
1951
                        "abcdefghijklmnopqrstuvwxyz"
60,000✔
1952
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
60,000✔
1953

1954
    thread_local static std::mt19937 rg{std::random_device{}()};
60,000✔
1955
    thread_local static std::uniform_int_distribution<std::string::size_type> pick(0, sizeof(chrs) - 2);
60,000✔
1956

1957
    std::string s;
60,000✔
1958

1959
    s.reserve(length);
60,000✔
1960

1961
    while (length--)
900,000✔
1962
        s += chrs[pick(rg)];
840,000✔
1963

1964
    return s;
60,000✔
1965
}
60,000✔
1966

1967
TEST(StringIndex_ListOfRandomStrings)
1968
{
2✔
1969
    using namespace std::chrono;
2✔
1970

1971
    SHARED_GROUP_TEST_PATH(path);
2✔
1972
    auto db = DB::create(path);
2✔
1973
    auto wt = db->start_write();
2✔
1974

1975
    auto t = wt->add_table_with_primary_key("foo", type_Int, "_id");
2✔
1976
    ColKey col_codes = t->add_column_list(type_String, "codes");
2✔
1977
    std::string some_string;
2✔
1978

1979
    for (size_t i = 0; i < 10000; i++) {
20,002✔
1980
        auto obj = t->create_object_with_primary_key(int64_t(i));
20,000✔
1981
        auto list = obj.get_list<String>(col_codes);
20,000✔
1982
        for (size_t j = 0; j < 3; j++) {
80,000✔
1983
            std::string str(random_string(14));
60,000✔
1984
            if (i == 5000 && j == 0) {
60,000✔
1985
                some_string = str;
2✔
1986
            }
2✔
1987
            list.add(StringData(str));
60,000✔
1988
        }
60,000✔
1989
    }
20,000✔
1990

1991
    std::vector<Mixed> arguments{Mixed(some_string)};
2✔
1992
    auto q = wt->get_table("foo")->query("codes = $0", arguments);
2✔
1993
    // auto t1 = steady_clock::now();
1994
    auto tv = q.find_all();
2✔
1995
    // auto t2 = steady_clock::now();
1996
    // std::cout << "time without index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1997
    CHECK_EQUAL(tv.size(), 1);
2✔
1998
    t->add_search_index(col_codes);
2✔
1999

2000
    // t1 = steady_clock::now();
2001
    tv = q.find_all();
2✔
2002
    // t2 = steady_clock::now();
2003
    // std::cout << "time with index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
2004
    CHECK_EQUAL(tv.size(), 1);
2✔
2005
    t->add_search_index(col_codes);
2✔
2006

2007
    // std::cout << tv.get_object(0).get<Int>("_id") << std::endl;
2008
}
2✔
2009

2010
TEST_TYPES(StringIndex_ListOfStrings, std::true_type, std::false_type)
2011
{
4✔
2012
    constexpr bool add_index = TEST_TYPE::value;
4✔
2013
    Group g;
4✔
2014

2015
    auto t = g.add_table("foo");
4✔
2016
    ColKey col = t->add_column_list(type_String, "names", true);
4✔
2017
    if constexpr (add_index) {
4✔
2018
        t->add_search_index(col);
2✔
2019
    }
2✔
2020

2021
    auto obj1 = t->create_object();
4✔
2022
    auto obj2 = t->create_object();
4✔
2023
    auto obj3 = t->create_object();
4✔
2024

2025
    for (Obj* obj : {&obj2, &obj3}) {
8✔
2026
        auto list = obj->get_list<String>(col);
8✔
2027
        list.add("Johnny");
8✔
2028
        list.add("John");
8✔
2029
    }
8✔
2030

2031
    auto list = obj1.get_list<String>(col);
4✔
2032
    list.add("Johnny");
4✔
2033
    list.add("John");
4✔
2034
    list.add("Ivan");
4✔
2035
    list.add("Ivan");
4✔
2036
    list.add(StringData());
4✔
2037

2038
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2039
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 3);
4✔
2040
    CHECK_EQUAL(t->query(R"(names = NULL)").count(), 1);
4✔
2041

2042
    list.set(0, "Paul");
4✔
2043
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2044
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2045
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2046

2047
    list.remove(1);
4✔
2048
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2049
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2050
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2051
    CHECK_EQUAL(t->query(R"(names = "Ivan")").count(), 1);
4✔
2052

2053
    list.clear();
4✔
2054
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2055
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2056
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 0);
4✔
2057

2058
    list = obj2.get_list<String>(col);
4✔
2059
    list.insert(0, "Adam");
4✔
2060
    list.insert(0, "Adam");
4✔
2061
    obj2.remove();
4✔
2062
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 1);
4✔
2063
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 1);
4✔
2064

2065
    std::string long1 = std::string(StringIndex::s_max_offset, 'a');
4✔
2066
    std::string long2 = long1 + "b";
4✔
2067

2068
    list = obj1.get_list<String>(col);
4✔
2069
    list.add(long1);
4✔
2070
    if (add_index) {
4✔
2071
        CHECK_THROW_ANY(list.add(long2));
2✔
2072
    }
2✔
2073
}
4✔
2074

2075
#endif // TEST_INDEX_STRING
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc