• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / github_pull_request_319175

19 May 2025 08:16PM UTC coverage: 91.119% (-0.02%) from 91.143%
github_pull_request_319175

Pull #8082

Evergreen

web-flow
Bump setuptools from 70.0.0 to 78.1.1 in /evergreen/hang_analyzer

Bumps [setuptools](https://github.com/pypa/setuptools) from 70.0.0 to 78.1.1.
- [Release notes](https://github.com/pypa/setuptools/releases)
- [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst)
- [Commits](https://github.com/pypa/setuptools/compare/v70.0.0...v78.1.1)

---
updated-dependencies:
- dependency-name: setuptools
  dependency-version: 78.1.1
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Pull Request #8082: Bump setuptools from 70.0.0 to 78.1.1 in /evergreen/hang_analyzer

102788 of 181548 branches covered (56.62%)

217441 of 238634 relevant lines covered (91.12%)

5497200.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.48
/test/test_index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include "testsettings.hpp"
20
#ifdef TEST_INDEX_STRING
21

22
#include <realm.hpp>
23
#include <realm/index_string.hpp>
24
#include <realm/query_expression.hpp>
25
#include <realm/tokenizer.hpp>
26
#include <realm/util/to_string.hpp>
27
#include <set>
28
#include "test.hpp"
29
#include "util/misc.hpp"
30
#include "util/random.hpp"
31

32
using namespace realm;
33
using namespace util;
34
using namespace realm;
35
using namespace realm::util;
36
using namespace realm::test_util;
37
using unit_test::TestContext;
38

39
// Test independence and thread-safety
40
// -----------------------------------
41
//
42
// All tests must be thread safe and independent of each other. This
43
// is required because it allows for both shuffling of the execution
44
// order and for parallelized testing.
45
//
46
// In particular, avoid using std::rand() since it is not guaranteed
47
// to be thread safe. Instead use the API offered in
48
// `test/util/random.hpp`.
49
//
50
// All files created in tests must use the TEST_PATH macro (or one of
51
// its friends) to obtain a suitable file system path. See
52
// `test/util/test_path.hpp`.
53
//
54
//
55
// Debugging and the ONLY() macro
56
// ------------------------------
57
//
58
// A simple way of disabling all tests except one called `Foo`, is to
59
// replace TEST(Foo) with ONLY(Foo) and then recompile and rerun the
60
// test suite. Note that you can also use filtering by setting the
61
// environment varible `UNITTEST_FILTER`. See `README.md` for more on
62
// this.
63
//
64
// Another way to debug a particular test, is to copy that test into
65
// `experiments/testcase.cpp` and then run `sh build.sh
66
// check-testcase` (or one of its friends) from the command line.
67

68

69
namespace {
70

71
template <typename T>
72
class column {
73
public:
74
    class ColumnTestType {
75
    public:
76
        ColumnTestType(column* owner)
77
            : m_owner(owner)
88✔
78
        {
176✔
79
        }
176✔
80
        const SearchIndex* create_search_index()
81
        {
178✔
82
            m_owner->m_table.add_search_index(m_owner->m_col_key);
178✔
83
            return m_owner->m_table.get_search_index(m_owner->m_col_key);
178✔
84
        }
178✔
85
        ObjKey key(size_t ndx) const
86
        {
176✔
87
            return m_keys[ndx];
176✔
88
        }
176✔
89
        size_t size() const
90
        {
294✔
91
            return m_keys.size();
294✔
92
        }
294✔
93
        void add(T value)
94
        {
79,638✔
95
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
79,638✔
96
            m_keys.push_back(k);
79,638✔
97
        }
79,638✔
98
        void add_null()
99
        {
2✔
100
            auto k = m_owner->m_table.create_object().set_null(m_owner->m_col_key).get_key();
2✔
101
            m_keys.push_back(k);
2✔
102
        }
2✔
103
        void set(size_t ndx, T value)
104
        {
44✔
105
            m_owner->m_table.get_object(m_keys[ndx]).set(m_owner->m_col_key, value);
44✔
106
        }
44✔
107
        void insert(size_t ndx, T value)
108
        {
10✔
109
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
10✔
110
            m_keys.insert(m_keys.begin() + ndx, k);
10✔
111
        }
10✔
112
        T get(size_t ndx)
113
        {
2,165,978✔
114
            return m_owner->m_table.get_object(m_keys[ndx]).template get<T>(m_owner->m_col_key);
2,165,978✔
115
        }
2,165,978✔
116
        T get(ObjKey obj_key)
117
        {
152✔
118
            return m_owner->m_table.get_object(obj_key).template get<T>(m_owner->m_col_key);
152✔
119
        }
152✔
120
        void erase(size_t ndx)
121
        {
186✔
122
            m_owner->m_table.remove_object(m_keys[ndx]);
186✔
123
            m_keys.erase(m_keys.begin() + ndx);
186✔
124
        }
186✔
125
        void clear()
126
        {
162✔
127
            m_owner->m_table.clear();
162✔
128
            m_keys.clear();
162✔
129
        }
162✔
130
        size_t find_first(T value) const
131
        {
2,824✔
132
            auto k = m_owner->m_table.find_first(m_owner->m_col_key, value);
2,824✔
133
            if (k == realm::null_key) {
2,824✔
134
                return realm::npos;
1,276✔
135
            }
1,276✔
136
            auto it = std::find(m_keys.begin(), m_keys.end(), k);
1,548✔
137
            return it - m_keys.begin();
1,548✔
138
        }
2,824✔
139
        size_t count(T value) const
140
        {
160✔
141
            return m_owner->m_table.count_string(m_owner->m_col_key, value);
160✔
142
        }
160✔
143
        void verify()
144
        {
24✔
145
            m_owner->m_table.verify();
24✔
146
        }
24✔
147

148
    private:
149
        column* m_owner;
150
        std::vector<ObjKey> m_keys;
151
    };
152

153
    column(bool nullable = false, bool enumerated = false)
154
        : m_column(this)
88✔
155
    {
176✔
156
        m_col_key = m_table.add_column(ColumnTypeTraits<T>::id, "values", nullable);
176✔
157
        if (enumerated) {
176✔
158
            m_table.enumerate_string_column(m_col_key);
78✔
159
        }
78✔
160
    }
176✔
161
    ColumnTestType& get_column()
162
    {
176✔
163
        return m_column;
176✔
164
    }
176✔
165

166
private:
167
    Table m_table;
168
    ColKey m_col_key;
169
    ColumnTestType m_column;
170
};
171

172
class string_column : public column<String> {
173
public:
174
    string_column()
175
        : column(false, false)
19✔
176
    {
38✔
177
    }
38✔
178
    static bool is_nullable()
179
    {
4✔
180
        return false;
4✔
181
    }
4✔
182
    static bool is_enumerated()
183
    {
4✔
184
        return false;
4✔
185
    }
4✔
186
};
187
class nullable_string_column : public column<String> {
188
public:
189
    nullable_string_column()
190
        : column(true, false)
20✔
191
    {
40✔
192
    }
40✔
193
    static bool is_nullable()
194
    {
4✔
195
        return true;
4✔
196
    }
4✔
197
    static bool is_enumerated()
198
    {
4✔
199
        return false;
4✔
200
    }
4✔
201
};
202
class enum_column : public column<String> {
203
public:
204
    enum_column()
205
        : column(false, true)
19✔
206
    {
38✔
207
    }
38✔
208
    static bool is_nullable()
209
    {
4✔
210
        return false;
4✔
211
    }
4✔
212
    static bool is_enumerated()
213
    {
4✔
214
        return true;
4✔
215
    }
4✔
216
};
217
class nullable_enum_column : public column<String> {
218
public:
219
    nullable_enum_column()
220
        : column(true, true)
20✔
221
    {
40✔
222
    }
40✔
223
    static bool is_nullable()
224
    {
4✔
225
        return true;
4✔
226
    }
4✔
227
    static bool is_enumerated()
228
    {
4✔
229
        return true;
4✔
230
    }
4✔
231
};
232

233
// disable to avoid warnings about not being used - enable when tests
234
// needed them are enabled again
235

236
// strings used by tests
237
const char s1[] = "John";
238
const char s2[] = "Brian";
239
const char s3[] = "Samantha";
240
const char s4[] = "Tom";
241
const char s5[] = "Johnathan";
242
const char s6[] = "Johnny";
243
const char s7[] = "Sam";
244

245
// integers used by integer index tests
246
std::vector<int64_t> ints = {0x1111,     0x11112222, 0x11113333, 0x1111333, 0x111122223333ull, 0x1111222233334ull,
247
                             0x22223333, 0x11112227, 0x11112227, 0x78923};
248

249
using nullable = std::true_type;
250
using non_nullable = std::false_type;
251

252
} // anonymous namespace
253

254
TEST(Tokenizer_Basic)
255
{
2✔
256
    auto tok = realm::Tokenizer::get_instance();
2✔
257

258
    tok->reset("to be or not to be");
2✔
259
    auto tokens = tok->get_all_tokens();
2✔
260
    CHECK_EQUAL(tokens.size(), 4);
2✔
261

262
    tok->reset("To be or not to be");
2✔
263
    realm::TokenInfoMap info = tok->get_token_info();
2✔
264
    CHECK_EQUAL(info.size(), 4);
2✔
265
    realm::TokenInfo& i(info["to"]);
2✔
266
    CHECK_EQUAL(i.positions.size(), 2);
2✔
267
    CHECK_EQUAL(i.positions[0], 0);
2✔
268
    CHECK_EQUAL(i.positions[1], 4);
2✔
269
    CHECK_EQUAL(i.ranges.size(), 2);
2✔
270
    CHECK_EQUAL(i.ranges[0].first, 0);
2✔
271
    CHECK_EQUAL(i.ranges[0].second, 2);
2✔
272
    CHECK_EQUAL(i.ranges[1].first, 13);
2✔
273
    CHECK_EQUAL(i.ranges[1].second, 15);
2✔
274

275
    tok->reset("Jeg gik mig over sø og land");
2✔
276
    info = tok->get_token_info();
2✔
277
    CHECK_EQUAL(info.size(), 7);
2✔
278
    realm::TokenInfo& j(info["sø"]);
2✔
279
    CHECK_EQUAL(j.ranges[0].first, 17);
2✔
280
    CHECK_EQUAL(j.ranges[0].second, 20);
2✔
281

282
    tok->reset("with-hyphen -term -other-term-plus");
2✔
283
    CHECK(tok->get_all_tokens() == std::set<std::string>({"with", "hyphen", "term", "other", "plus"}));
2✔
284
}
2✔
285

286
TEST(StringIndex_NonIndexable)
287
{
2✔
288
    // Create a column with string values
289
    Group group;
2✔
290
    TableRef table = group.add_table("table");
2✔
291
    TableRef target_table = group.add_table("target");
2✔
292
    table->add_column(*target_table, "link");
2✔
293
    table->add_column_list(*target_table, "linkList");
2✔
294
    table->add_column(type_Double, "double");
2✔
295
    table->add_column(type_Float, "float");
2✔
296
    table->add_column(type_Binary, "binary");
2✔
297

298
    for (auto col : table->get_column_keys()) {
10✔
299
        CHECK_LOGIC_ERROR(table->add_search_index(col), ErrorCodes::IllegalOperation);
10✔
300
    }
10✔
301
}
2✔
302

303
TEST_TYPES(StringIndex_BuildIndex, string_column, nullable_string_column, enum_column, nullable_enum_column)
304
{
8✔
305
    TEST_TYPE test_resources;
8✔
306
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
307

308
    col.add(s1);
8✔
309
    col.add(s2);
8✔
310
    col.add(s3);
8✔
311
    col.add(s4);
8✔
312
    col.add(s1); // duplicate value
8✔
313
    col.add(s5); // common prefix
8✔
314
    col.add(s6); // common prefix
8✔
315

316
    // Create a new index on column
317
    const SearchIndex& ndx = *col.create_search_index();
8✔
318

319
    const ObjKey r1 = ndx.find_first(s1);
8✔
320
    const ObjKey r2 = ndx.find_first(s2);
8✔
321
    const ObjKey r3 = ndx.find_first(s3);
8✔
322
    const ObjKey r4 = ndx.find_first(s4);
8✔
323
    const ObjKey r5 = ndx.find_first(s5);
8✔
324
    const ObjKey r6 = ndx.find_first(s6);
8✔
325

326
    CHECK_EQUAL(0, r1.value);
8✔
327
    CHECK_EQUAL(1, r2.value);
8✔
328
    CHECK_EQUAL(2, r3.value);
8✔
329
    CHECK_EQUAL(3, r4.value);
8✔
330
    CHECK_EQUAL(5, r5.value);
8✔
331
    CHECK_EQUAL(6, r6.value);
8✔
332
}
8✔
333

334
TEST_TYPES(StringIndex_DeleteAll, string_column, nullable_string_column, enum_column, nullable_enum_column)
335
{
8✔
336
    TEST_TYPE test_resources;
8✔
337
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
338

339
    col.add(s1);
8✔
340
    col.add(s2);
8✔
341
    col.add(s3);
8✔
342
    col.add(s4);
8✔
343
    col.add(s1); // duplicate value
8✔
344
    col.add(s5); // common prefix
8✔
345
    col.add(s6); // common prefix
8✔
346

347
    // Create a new index on column
348
    const SearchIndex& ndx = *col.create_search_index();
8✔
349

350
    // Delete all entries
351
    // (reverse order to avoid ref updates)
352
    col.erase(6);
8✔
353
    col.erase(5);
8✔
354
    col.erase(4);
8✔
355
    col.erase(3);
8✔
356
    col.erase(2);
8✔
357
    col.erase(1);
8✔
358
    col.erase(0);
8✔
359
    CHECK(ndx.is_empty());
8✔
360

361
    // Re-insert values
362
    col.add(s1);
8✔
363
    col.add(s2);
8✔
364
    col.add(s3);
8✔
365
    col.add(s4);
8✔
366
    col.add(s1); // duplicate value
8✔
367
    col.add(s5); // common prefix
8✔
368
    col.add(s6); // common prefix
8✔
369

370
    // Delete all entries
371
    // (in order to force constant ref updating)
372
    col.erase(0);
8✔
373
    col.erase(0);
8✔
374
    col.erase(0);
8✔
375
    col.erase(0);
8✔
376
    col.erase(0);
8✔
377
    col.erase(0);
8✔
378
    col.erase(0);
8✔
379
    CHECK(ndx.is_empty());
8✔
380
}
8✔
381

382
TEST_TYPES(StringIndex_Delete, string_column, nullable_string_column, enum_column, nullable_enum_column)
383
{
8✔
384
    TEST_TYPE test_resources;
8✔
385
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
386

387
    col.add(s1);
8✔
388
    col.add(s2);
8✔
389
    col.add(s3);
8✔
390
    col.add(s4);
8✔
391
    col.add(s1); // duplicate value
8✔
392

393
    // Create a new index on column
394
    const SearchIndex& ndx = *col.create_search_index();
8✔
395

396
    // Delete first item (in index)
397
    col.erase(1);
8✔
398

399
    CHECK_EQUAL(0, col.find_first(s1));
8✔
400
    CHECK_EQUAL(1, col.find_first(s3));
8✔
401
    CHECK_EQUAL(2, col.find_first(s4));
8✔
402
    CHECK_EQUAL(null_key, ndx.find_first(s2));
8✔
403

404
    // Delete last item (in index)
405
    col.erase(2);
8✔
406

407
    CHECK_EQUAL(0, col.find_first(s1));
8✔
408
    CHECK_EQUAL(1, col.find_first(s3));
8✔
409
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
410
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
411

412
    // Delete middle item (in index)
413
    col.erase(1);
8✔
414

415
    CHECK_EQUAL(0, col.find_first(s1));
8✔
416
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
417
    CHECK_EQUAL(not_found, col.find_first(s4));
8✔
418
    CHECK_EQUAL(not_found, col.find_first(s2));
8✔
419

420
    // Delete all items
421
    col.erase(0);
8✔
422
    col.erase(0);
8✔
423
    CHECK(ndx.is_empty());
8✔
424
}
8✔
425

426

427
TEST_TYPES(StringIndex_ClearEmpty, string_column, nullable_string_column, enum_column, nullable_enum_column)
428
{
8✔
429
    TEST_TYPE test_resources;
8✔
430
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
431

432
    // Create a new index on column
433
    const SearchIndex& ndx = *col.create_search_index();
8✔
434

435
    // Clear to remove all entries
436
    col.clear();
8✔
437
    CHECK(ndx.is_empty());
8✔
438
}
8✔
439

440
TEST_TYPES(StringIndex_Clear, string_column, nullable_string_column, enum_column, nullable_enum_column)
441
{
8✔
442
    TEST_TYPE test_resources;
8✔
443
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
444

445
    col.add(s1);
8✔
446
    col.add(s2);
8✔
447
    col.add(s3);
8✔
448
    col.add(s4);
8✔
449
    col.add(s1); // duplicate value
8✔
450
    col.add(s5); // common prefix
8✔
451
    col.add(s6); // common prefix
8✔
452

453
    // Create a new index on column
454
    const SearchIndex& ndx = *col.create_search_index();
8✔
455

456
    // Clear to remove all entries
457
    col.clear();
8✔
458
    CHECK(ndx.is_empty());
8✔
459

460
    // Re-insert values
461
    col.add(s1);
8✔
462
    col.add(s2);
8✔
463
    col.add(s3);
8✔
464
    col.add(s4);
8✔
465
    col.add(s1); // duplicate value
8✔
466
    col.add(s5); // common prefix
8✔
467
    col.add(s6); // common prefix
8✔
468

469
    const ObjKey r1 = ndx.find_first(s1);
8✔
470
    const ObjKey r2 = ndx.find_first(s2);
8✔
471
    const ObjKey r3 = ndx.find_first(s3);
8✔
472
    const ObjKey r4 = ndx.find_first(s4);
8✔
473
    const ObjKey r5 = ndx.find_first(s5);
8✔
474
    const ObjKey r6 = ndx.find_first(s6);
8✔
475

476
    CHECK_EQUAL(col.key(0), r1);
8✔
477
    CHECK_EQUAL(col.key(1), r2);
8✔
478
    CHECK_EQUAL(col.key(2), r3);
8✔
479
    CHECK_EQUAL(col.key(3), r4);
8✔
480
    CHECK_EQUAL(col.key(5), r5);
8✔
481
    CHECK_EQUAL(col.key(6), r6);
8✔
482
}
8✔
483

484

485
TEST_TYPES(StringIndex_Set, string_column, nullable_string_column, enum_column, nullable_enum_column)
486
{
8✔
487
    TEST_TYPE test_resources;
8✔
488
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
489

490
    col.add(s1);
8✔
491
    col.add(s2);
8✔
492
    col.add(s3);
8✔
493
    col.add(s4);
8✔
494
    col.add(s1); // duplicate value
8✔
495

496
    // Create a new index on column
497
    col.create_search_index();
8✔
498

499
    // Set top value
500
    col.set(0, s5);
8✔
501

502
    CHECK_EQUAL(0, col.find_first(s5));
8✔
503
    CHECK_EQUAL(1, col.find_first(s2));
8✔
504
    CHECK_EQUAL(2, col.find_first(s3));
8✔
505
    CHECK_EQUAL(3, col.find_first(s4));
8✔
506
    CHECK_EQUAL(4, col.find_first(s1));
8✔
507

508
    // Set bottom value
509
    col.set(4, s6);
8✔
510

511
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
512
    CHECK_EQUAL(0, col.find_first(s5));
8✔
513
    CHECK_EQUAL(1, col.find_first(s2));
8✔
514
    CHECK_EQUAL(2, col.find_first(s3));
8✔
515
    CHECK_EQUAL(3, col.find_first(s4));
8✔
516
    CHECK_EQUAL(4, col.find_first(s6));
8✔
517

518
    // Set middle value
519
    col.set(2, s7);
8✔
520

521
    CHECK_EQUAL(not_found, col.find_first(s3));
8✔
522
    CHECK_EQUAL(not_found, col.find_first(s1));
8✔
523
    CHECK_EQUAL(0, col.find_first(s5));
8✔
524
    CHECK_EQUAL(1, col.find_first(s2));
8✔
525
    CHECK_EQUAL(2, col.find_first(s7));
8✔
526
    CHECK_EQUAL(3, col.find_first(s4));
8✔
527
    CHECK_EQUAL(4, col.find_first(s6));
8✔
528
}
8✔
529

530
TEST_TYPES(StringIndex_Count, string_column, nullable_string_column, enum_column, nullable_enum_column)
531
{
8✔
532
    TEST_TYPE test_resources;
8✔
533
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
534

535
    col.add(s1);
8✔
536
    col.add(s2);
8✔
537
    col.add(s2);
8✔
538
    col.add(s3);
8✔
539
    col.add(s3);
8✔
540
    col.add(s3);
8✔
541
    col.add(s4);
8✔
542
    col.add(s4);
8✔
543
    col.add(s4);
8✔
544
    col.add(s4);
8✔
545

546
    // Create a new index on column
547
    col.create_search_index();
8✔
548

549
    // Counts
550
    const size_t c0 = col.count(s5);
8✔
551
    const size_t c1 = col.count(s1);
8✔
552
    const size_t c2 = col.count(s2);
8✔
553
    const size_t c3 = col.count(s3);
8✔
554
    const size_t c4 = col.count(s4);
8✔
555
    CHECK_EQUAL(0, c0);
8✔
556
    CHECK_EQUAL(1, c1);
8✔
557
    CHECK_EQUAL(2, c2);
8✔
558
    CHECK_EQUAL(3, c3);
8✔
559
    CHECK_EQUAL(4, c4);
8✔
560
}
8✔
561

562
TEST_TYPES(StringIndex_Distinct, string_column, nullable_string_column, enum_column, nullable_enum_column)
563
{
8✔
564
    TEST_TYPE test_resources;
8✔
565
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
566

567
    col.add(s1);
8✔
568
    col.add(s2);
8✔
569
    col.add(s2);
8✔
570
    col.add(s3);
8✔
571
    col.add(s3);
8✔
572
    col.add(s3);
8✔
573
    col.add(s4);
8✔
574
    col.add(s4);
8✔
575
    col.add(s4);
8✔
576
    col.add(s4);
8✔
577

578
    // Create a new index on column
579
    const SearchIndex* ndx = col.create_search_index();
8✔
580
    CHECK(ndx->has_duplicate_values());
8✔
581
}
8✔
582

583
TEST_TYPES(StringIndex_FindAllNoCopy, string_column, nullable_string_column, enum_column, nullable_enum_column)
584
{
8✔
585
    TEST_TYPE test_resources;
8✔
586
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
587

588
    col.add(s1);
8✔
589
    col.add(s2);
8✔
590
    col.add(s2);
8✔
591
    col.add(s3);
8✔
592
    col.add(s3);
8✔
593
    col.add(s3);
8✔
594
    col.add(s4);
8✔
595
    col.add(s4);
8✔
596
    col.add(s4);
8✔
597
    col.add(s4);
8✔
598

599
    // Create a new index on column
600
    const SearchIndex& ndx = *col.create_search_index();
8✔
601

602
    InternalFindResult ref_2;
8✔
603
    FindRes res1 = ndx.find_all_no_copy(StringData("not there"), ref_2);
8✔
604
    CHECK_EQUAL(FindRes_not_found, res1);
8✔
605

606
    FindRes res2 = ndx.find_all_no_copy(s1, ref_2);
8✔
607
    CHECK_EQUAL(FindRes_single, res2);
8✔
608
    CHECK_EQUAL(0, ref_2.payload);
8✔
609

610
    FindRes res3 = ndx.find_all_no_copy(s4, ref_2);
8✔
611
    CHECK_EQUAL(FindRes_column, res3);
8✔
612
    BPlusTree<ObjKey> results(Allocator::get_default());
8✔
613
    results.init_from_ref(ref_type(ref_2.payload));
8✔
614

615
    CHECK_EQUAL(4, ref_2.end_ndx - ref_2.start_ndx);
8✔
616
    CHECK_EQUAL(4, results.size());
8✔
617
    CHECK_EQUAL(col.key(6), results.get(0));
8✔
618
    CHECK_EQUAL(col.key(7), results.get(1));
8✔
619
    CHECK_EQUAL(col.key(8), results.get(2));
8✔
620
    CHECK_EQUAL(col.key(9), results.get(3));
8✔
621
}
8✔
622

623
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
624
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
625
// of copying it to you, as a performance optimization.
626
TEST(StringIndex_FindAllNoCopy2_Int)
627
{
2✔
628
    // Create a column with duplcate values
629
    column<Int> test_resources;
2✔
630
    auto col = test_resources.get_column();
2✔
631

632
    for (auto i : ints)
2✔
633
        col.add(i);
20✔
634

635
    // Create a new index on column
636
    col.create_search_index();
2✔
637
    const SearchIndex& ndx = *col.create_search_index();
2✔
638
    InternalFindResult results;
2✔
639

640
    for (auto i : ints) {
20✔
641
        FindRes res = ndx.find_all_no_copy(i, results);
20✔
642

643
        size_t real = 0;
20✔
644
        for (auto j : ints) {
200✔
645
            if (i == j)
200✔
646
                real++;
24✔
647
        }
200✔
648

649
        if (real == 1) {
20✔
650
            CHECK_EQUAL(res, FindRes_single);
16✔
651
            CHECK_EQUAL(i, ints[size_t(results.payload)]);
16✔
652
        }
16✔
653
        else if (real > 1) {
4✔
654
            CHECK_EQUAL(FindRes_column, res);
4✔
655
            const IntegerColumn results_column(Allocator::get_default(), ref_type(results.payload));
4✔
656
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
4✔
657
            CHECK_EQUAL(real, results_column.size());
4✔
658
            for (size_t y = 0; y < real; y++)
12✔
659
                CHECK_EQUAL(i, ints[size_t(results_column.get(y))]);
8✔
660
        }
4✔
661
    }
20✔
662
}
2✔
663

664
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
665
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
666
// of copying it to you, as a performance optimization.
667
TEST(StringIndex_FindAllNoCopy2_IntNull)
668
{
2✔
669
    // Create a column with duplcate values
670
    column<Int> test_resources(true);
2✔
671
    auto col = test_resources.get_column();
2✔
672

673
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++)
8✔
674
        col.add(ints[t]);
6✔
675
    col.add_null();
2✔
676

677
    // Create a new index on column
678
    const SearchIndex& ndx = *col.create_search_index();
2✔
679
    InternalFindResult results;
2✔
680

681
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++) {
8✔
682
        FindRes res = ndx.find_all_no_copy(ints[t], results);
6✔
683

684
        size_t real = 0;
6✔
685
        for (size_t y = 0; y < sizeof(ints) / sizeof(ints[0]); y++) {
24✔
686
            if (ints[t] == ints[y])
18✔
687
                real++;
6✔
688
        }
18✔
689

690
        if (real == 1) {
6✔
691
            CHECK_EQUAL(res, FindRes_single);
6✔
692
            CHECK_EQUAL(ints[t], ints[size_t(results.payload)]);
6✔
693
        }
6✔
694
        else if (real > 1) {
×
695
            CHECK_EQUAL(FindRes_column, res);
×
696
            const IntegerColumn results2(Allocator::get_default(), ref_type(results.payload));
×
697
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
×
698
            CHECK_EQUAL(real, results2.size());
×
699
            for (size_t y = 0; y < real; y++)
×
700
                CHECK_EQUAL(ints[t], ints[size_t(results2.get(y))]);
×
701
        }
×
702
    }
6✔
703

704
    FindRes res = ndx.find_all_no_copy(null{}, results);
2✔
705
    CHECK_EQUAL(FindRes_single, res);
2✔
706
    CHECK_EQUAL(results.payload, col.size() - 1);
2✔
707
}
2✔
708

709
TEST_TYPES(StringIndex_FindAllNoCopyCommonPrefixStrings, string_column, nullable_string_column, enum_column,
710
           nullable_enum_column)
711
{
8✔
712
    TEST_TYPE test_resources;
8✔
713
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
714
    const SearchIndex& ndx = *col.create_search_index();
8✔
715

716
    auto test_prefix_find = [&](std::string prefix) {
24✔
717
        std::string prefix_b = prefix + "b";
24✔
718
        std::string prefix_c = prefix + "c";
24✔
719
        std::string prefix_d = prefix + "d";
24✔
720
        std::string prefix_e = prefix + "e";
24✔
721
        StringData spb(prefix_b);
24✔
722
        StringData spc(prefix_c);
24✔
723
        StringData spd(prefix_d);
24✔
724
        StringData spe(prefix_e);
24✔
725

726
        size_t start_row = col.size();
24✔
727
        col.add(spb);
24✔
728
        col.add(spc);
24✔
729
        col.add(spc);
24✔
730
        col.add(spe);
24✔
731
        col.add(spe);
24✔
732
        col.add(spe);
24✔
733

734
        InternalFindResult results;
24✔
735
        FindRes res = ndx.find_all_no_copy(spb, results);
24✔
736
        CHECK_EQUAL(res, FindRes_single);
24✔
737
        CHECK_EQUAL(results.payload, start_row);
24✔
738

739
        res = ndx.find_all_no_copy(spc, results);
24✔
740
        CHECK_EQUAL(res, FindRes_column);
24✔
741
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 2);
24✔
742
        const IntegerColumn results_c(Allocator::get_default(), ref_type(results.payload));
24✔
743
        CHECK_EQUAL(results_c.get(results.start_ndx), start_row + 1);
24✔
744
        CHECK_EQUAL(results_c.get(results.start_ndx + 1), start_row + 2);
24✔
745
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx))), spc);
24✔
746
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx + 1))), spc);
24✔
747

748
        res = ndx.find_all_no_copy(spd, results);
24✔
749
        CHECK_EQUAL(res, FindRes_not_found);
24✔
750

751
        res = ndx.find_all_no_copy(spe, results);
24✔
752
        CHECK_EQUAL(res, FindRes_column);
24✔
753
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 3);
24✔
754
        const IntegerColumn results_e(Allocator::get_default(), ref_type(results.payload));
24✔
755
        CHECK_EQUAL(results_e.get(results.start_ndx), start_row + 3);
24✔
756
        CHECK_EQUAL(results_e.get(results.start_ndx + 1), start_row + 4);
24✔
757
        CHECK_EQUAL(results_e.get(results.start_ndx + 2), start_row + 5);
24✔
758
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx))), spe);
24✔
759
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 1))), spe);
24✔
760
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 2))), spe);
24✔
761
    };
24✔
762

763
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
764
    std::string std_over_max = std_max + "a";
8✔
765
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
766

767
    test_prefix_find(std_max);
8✔
768
    test_prefix_find(std_over_max);
8✔
769
    test_prefix_find(std_under_max);
8✔
770
}
8✔
771

772
TEST(StringIndex_Count_Int)
773
{
2✔
774
    // Create a column with duplicate values
775
    column<Int> test_resources;
2✔
776
    auto col = test_resources.get_column();
2✔
777

778
    for (auto i : ints)
2✔
779
        col.add(i);
20✔
780

781
    // Create a new index on column
782
    const SearchIndex& ndx = *col.create_search_index();
2✔
783

784
    for (auto i : ints) {
20✔
785
        size_t count = ndx.count(i);
20✔
786

787
        size_t real = 0;
20✔
788
        for (auto j : ints) {
200✔
789
            if (i == j)
200✔
790
                real++;
24✔
791
        }
200✔
792

793
        CHECK_EQUAL(real, count);
20✔
794
    }
20✔
795
}
2✔
796

797

798
TEST(StringIndex_Distinct_Int)
799
{
2✔
800
    // Create a column with duplicate values
801
    column<Int> test_resources;
2✔
802
    auto col = test_resources.get_column();
2✔
803

804
    for (auto i : ints)
2✔
805
        col.add(i);
20✔
806

807
    // Create a new index on column
808
    auto ndx = col.create_search_index();
2✔
809
    CHECK(ndx->has_duplicate_values());
2✔
810
}
2✔
811

812

813
TEST(StringIndex_Set_Add_Erase_Insert_Int)
814
{
2✔
815
    column<Int> test_resources;
2✔
816
    auto col = test_resources.get_column();
2✔
817

818
    col.add(1);
2✔
819
    col.add(2);
2✔
820
    col.add(3);
2✔
821
    col.add(2);
2✔
822

823
    // Create a new index on column
824
    const SearchIndex& ndx = *col.create_search_index();
2✔
825

826
    ObjKey f = ndx.find_first(int64_t(2));
2✔
827
    CHECK_EQUAL(col.key(1), f);
2✔
828

829
    col.set(1, 5);
2✔
830

831
    f = ndx.find_first(int64_t(2));
2✔
832
    CHECK_EQUAL(col.key(3), f);
2✔
833

834
    col.erase(1);
2✔
835

836
    f = ndx.find_first(int64_t(2));
2✔
837
    CHECK_EQUAL(col.key(2), f);
2✔
838

839
    col.insert(1, 5);
2✔
840
    CHECK_EQUAL(col.get(1), 5);
2✔
841

842
    f = ndx.find_first(int64_t(2));
2✔
843
    CHECK_EQUAL(col.key(3), f);
2✔
844

845
    col.add(7);
2✔
846
    CHECK_EQUAL(col.get(4), 7);
2✔
847
    col.set(4, 10);
2✔
848
    CHECK_EQUAL(col.get(4), 10);
2✔
849

850
    f = ndx.find_first(int64_t(10));
2✔
851
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
852

853
    col.add(9);
2✔
854
    f = ndx.find_first(int64_t(9));
2✔
855
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
856

857
    col.clear();
2✔
858
    f = ndx.find_first(int64_t(2));
2✔
859
    CHECK_EQUAL(null_key, f);
2✔
860
}
2✔
861

862
TEST(StringIndex_FuzzyTest_Int)
863
{
2✔
864
    column<Int> test_resources;
2✔
865
    auto col = test_resources.get_column();
2✔
866
    Random random(random_int<unsigned long>());
2✔
867
    const size_t n = static_cast<size_t>(1.2 * REALM_MAX_BPNODE_SIZE);
2✔
868

869
    col.create_search_index();
2✔
870

871
    for (size_t t = 0; t < n; ++t) {
2,402✔
872
        col.add(random.draw_int_max(0xffffffffffffffff));
2,400✔
873
    }
2,400✔
874

875
    for (size_t t = 0; t < n; ++t) {
2,402✔
876
        int64_t r;
2,400✔
877
        if (random.draw_bool())
2,400✔
878
            r = col.get(t);
1,188✔
879
        else
1,212✔
880
            r = random.draw_int_max(0xffffffffffffffff);
1,212✔
881

882
        size_t m = col.find_first(r);
2,400✔
883
        for (size_t t_2 = 0; t_2 < n; ++t_2) {
2,165,756✔
884
            if (col.get(t_2) == r) {
2,164,544✔
885
                CHECK_EQUAL(t_2, m);
1,188✔
886
                break;
1,188✔
887
            }
1,188✔
888
        }
2,164,544✔
889
    }
2,400✔
890
}
2✔
891

892
namespace {
893

894
// Generate string where the bit pattern in bits is converted to NUL bytes. E.g. (length=2):
895
// bits=0 -> "\0\0", bits=1 -> "\x\0", bits=2 -> "\0\x", bits=3 -> "\x\x", where x is a random byte
896
StringData create_string_with_nuls(const size_t bits, const size_t length, char* tmp, Random& random)
897
{
×
898
    for (size_t i = 0; i < length; ++i) {
×
899
        bool insert_nul_at_pos = (bits & (size_t(1) << i)) == 0;
×
900
        if (insert_nul_at_pos) {
×
901
            tmp[i] = '\0';
×
902
        }
×
903
        else {
×
904
            // Avoid stray \0 chars, since we are already testing all combinations.
905
            // All casts are necessary to preserve the bitpattern.
906
            tmp[i] = static_cast<char>(static_cast<unsigned char>(random.draw_int<unsigned int>(1, UCHAR_MAX)));
×
907
        }
×
908
    }
×
909
    return StringData(tmp, length);
×
910
}
×
911

912
} // anonymous namespace
913

914

915
// Test for generated strings of length 1..16 with all combinations of embedded NUL bytes
916
TEST_TYPES_IF(StringIndex_EmbeddedZeroesCombinations, TEST_DURATION > 1, string_column, nullable_string_column)
917
{
×
918
    TEST_TYPE test_resources;
×
919
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
920
    const SearchIndex& ndx = *col.create_search_index();
×
921

922
    constexpr unsigned int seed = 42;
×
923
    const size_t MAX_LENGTH = 16; // Test medium
×
924
    char tmp[MAX_LENGTH];         // this is a bit of a hack, that relies on the string being copied in column.add()
×
925

926
    for (size_t length = 1; length <= MAX_LENGTH; ++length) {
×
927

928
        {
×
929
            Random random(seed);
×
930
            const size_t combinations = size_t(1) << length;
×
931
            for (size_t i = 0; i < combinations; ++i) {
×
932
                StringData str = create_string_with_nuls(i, length, tmp, random);
×
933
                col.add(str);
×
934
            }
×
935
        }
×
936

937
        // check index up to this length
938
        size_t expected_index = 0;
×
939
        for (size_t l = 1; l <= length; ++l) {
×
940
            Random random(seed);
×
941
            const size_t combinations = size_t(1) << l;
×
942
            for (size_t i = 0; i < combinations; ++i) {
×
943
                StringData needle = create_string_with_nuls(i, l, tmp, random);
×
944
                CHECK_EQUAL(ndx.find_first(needle), col.key(expected_index));
×
945
                CHECK(strncmp(col.get(expected_index).data(), needle.data(), l) == 0);
×
946
                CHECK_EQUAL(col.get(expected_index).size(), needle.size());
×
947
                expected_index++;
×
948
            }
×
949
        }
×
950
    }
×
951
}
×
952

953
// Tests for a bug with strings containing zeroes
954
TEST_TYPES(StringIndex_EmbeddedZeroes, string_column, nullable_string_column, enum_column, nullable_enum_column)
955
{
8✔
956
    TEST_TYPE test_resources;
8✔
957
    typename TEST_TYPE::ColumnTestType& col2 = test_resources.get_column();
8✔
958
    const SearchIndex& ndx2 = *col2.create_search_index();
8✔
959

960
    // FIXME: re-enable once embedded nuls work
961
    col2.add(StringData("\0", 1));
8✔
962
    col2.add(StringData("\1", 1));
8✔
963
    col2.add(StringData("\0\0", 2));
8✔
964
    col2.add(StringData("\0\1", 2));
8✔
965
    col2.add(StringData("\1\0", 2));
8✔
966

967
    CHECK_EQUAL(ndx2.find_first(StringData("\0", 1)), col2.key(0));
8✔
968
    CHECK_EQUAL(ndx2.find_first(StringData("\1", 1)), col2.key(1));
8✔
969
    CHECK_EQUAL(ndx2.find_first(StringData("\2", 1)), null_key);
8✔
970
    CHECK_EQUAL(ndx2.find_first(StringData("\0\0", 2)), col2.key(2));
8✔
971
    CHECK_EQUAL(ndx2.find_first(StringData("\0\1", 2)), col2.key(3));
8✔
972
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0", 2)), col2.key(4));
8✔
973
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0\0", 3)), null_key);
8✔
974

975
    // Integer index (uses String index internally)
976
    int64_t v = 1ULL << 41;
8✔
977
    column<Int> test_resources_1;
8✔
978
    auto col = test_resources_1.get_column();
8✔
979
    const SearchIndex& ndx = *col.create_search_index();
8✔
980
    col.add(1ULL << 40);
8✔
981
    auto f = ndx.find_first(v);
8✔
982
    CHECK_EQUAL(f, null_key);
8✔
983
}
8✔
984

985
TEST_TYPES(StringIndex_Null, nullable_string_column, nullable_enum_column)
986
{
4✔
987
    TEST_TYPE test_resources;
4✔
988
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
989

990
    col.add("");
4✔
991
    col.add(realm::null());
4✔
992

993
    const SearchIndex& ndx = *col.create_search_index();
4✔
994

995
    auto r1 = ndx.find_first(realm::null());
4✔
996
    CHECK_EQUAL(r1, col.key(1));
4✔
997
}
4✔
998

999

1000
TEST_TYPES(StringIndex_Zero_Crash, string_column, nullable_string_column, enum_column, nullable_enum_column)
1001
{
8✔
1002
    bool nullable = TEST_TYPE::is_nullable();
8✔
1003

1004
    // StringIndex could crash if strings ended with one or more 0-bytes
1005
    Table table;
8✔
1006
    auto col = table.add_column(type_String, "strings", nullable);
8✔
1007

1008
    auto k0 = table.create_object().set(col, StringData("")).get_key();
8✔
1009
    auto k1 = table.create_object().set(col, StringData("\0", 1)).get_key();
8✔
1010
    auto k2 = table.create_object().set(col, StringData("\0\0", 2)).get_key();
8✔
1011
    table.add_search_index(col);
8✔
1012

1013
    if (TEST_TYPE::is_enumerated())
8✔
1014
        table.enumerate_string_column(col);
4✔
1015

1016
    ObjKey t;
8✔
1017

1018
    t = table.find_first_string(col, StringData(""));
8✔
1019
    CHECK_EQUAL(k0, t);
8✔
1020

1021
    t = table.find_first_string(col, StringData("\0", 1));
8✔
1022
    CHECK_EQUAL(k1, t);
8✔
1023

1024
    t = table.find_first_string(col, StringData("\0\0", 2));
8✔
1025
    CHECK_EQUAL(k2, t);
8✔
1026
}
8✔
1027

1028
TEST_TYPES(StringIndex_Zero_Crash2, std::true_type, std::false_type)
1029
{
4✔
1030
    Random random(random_int<unsigned long>());
4✔
1031

1032
    constexpr bool add_common_prefix = TEST_TYPE::value;
4✔
1033

1034
    for (size_t iter = 0; iter < 10 + TEST_DURATION * 100; iter++) {
44✔
1035
        // StringIndex could crash if strings ended with one or more 0-bytes
1036
        Table table;
40✔
1037
        auto col = table.add_column(type_String, "string", true);
40✔
1038

1039
        table.add_search_index(col);
40✔
1040

1041
        for (size_t i = 0; i < 100 + TEST_DURATION * 1000; i++) {
4,040✔
1042
            unsigned char action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1043
            if (action == 0) {
4,000✔
1044
                table.remove_search_index(col);
39✔
1045
                table.add_search_index(col);
39✔
1046
            }
39✔
1047
            else if (action > 48 && table.size() < 10) {
3,961✔
1048
                // Generate string with equal probability of being empty, null, short, medium and long, and with
1049
                // their contents having equal proability of being either random or a duplicate of a previous
1050
                // string. When it's random, each char must have equal probability of being 0 or non-0e
1051
                static std::string buf =
1,969✔
1052
                    "This string is around 90 bytes long, which falls in the long-string type of Realm strings";
1,969✔
1053

1054
                std::string copy = buf;
1,969✔
1055

1056
                static std::string buf2 =
1,969✔
1057
                    "                                                                                         ";
1,969✔
1058
                std::string copy2 = buf2;
1,969✔
1059
                StringData sd;
1,969✔
1060

1061
                size_t len = random.draw_int_max<size_t>(3);
1,969✔
1062
                if (len == 0)
1,969✔
1063
                    len = 0;
489✔
1064
                else if (len == 1)
1,480✔
1065
                    len = 7;
492✔
1066
                else if (len == 2)
988✔
1067
                    len = 27;
484✔
1068
                else
504✔
1069
                    len = random.draw_int_max<size_t>(90);
504✔
1070

1071
                copy = copy.substr(0, len);
1,969✔
1072
                if (add_common_prefix) {
1,969✔
1073
                    std::string prefix(StringIndex::s_max_offset, 'a');
974✔
1074
                    copy = prefix + copy;
974✔
1075
                }
974✔
1076

1077
                if (random.draw_int_max<int>(1) == 0) {
1,969✔
1078
                    // duplicate string
1079
                    sd = StringData(copy);
969✔
1080
                }
969✔
1081
                else {
1,000✔
1082
                    // random string
1083
                    for (size_t t = 0; t < len; t++) {
20,798✔
1084
                        if (random.draw_int_max<int>(100) > 20)
19,798✔
1085
                            copy2[t] = 0; // zero byte
15,606✔
1086
                        else
4,192✔
1087
                            copy2[t] = static_cast<char>(random.draw_int<int>()); // random byte
4,192✔
1088
                    }
19,798✔
1089
                    // no generated string can equal "null" (our vector magic value for null) because
1090
                    // len == 4 is not possible
1091
                    copy2 = copy2.substr(0, len);
1,000✔
1092
                    if (add_common_prefix) {
1,000✔
1093
                        std::string prefix(StringIndex::s_max_offset, 'a');
479✔
1094
                        copy2 = prefix + copy2;
479✔
1095
                    }
479✔
1096
                    sd = StringData(copy2);
1,000✔
1097
                }
1,000✔
1098

1099
                bool done = false;
1,969✔
1100
                do {
1,969✔
1101
                    int64_t key_val = random.draw_int_max<int64_t>(10000);
1,969✔
1102
                    try {
1,969✔
1103
                        table.create_object(ObjKey(key_val)).set(col, sd);
1,969✔
1104
                        done = true;
1,969✔
1105
                    }
1,969✔
1106
                    catch (...) {
1,969✔
1107
                    }
×
1108
                } while (!done);
1,969✔
1109
                table.verify();
1,969✔
1110
            }
1,969✔
1111
            else if (table.size() > 0) {
1,992✔
1112
                // delete
1113
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
1,760✔
1114
                Obj obj = table.get_object(row);
1,760✔
1115
                obj.remove();
1,760✔
1116
            }
1,760✔
1117

1118
            action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1119
            if (table.size() > 0) {
4,000✔
1120
                // Search for value that exists
1121
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
3,557✔
1122
                Obj obj = table.get_object(row);
3,557✔
1123
                StringData sd = obj.get<String>(col);
3,557✔
1124
                ObjKey t = table.find_first_string(col, sd);
3,557✔
1125
                StringData sd2 = table.get_object(t).get<String>(col);
3,557✔
1126
                CHECK_EQUAL(sd, sd2);
3,557✔
1127
            }
3,557✔
1128
        }
4,000✔
1129
    }
40✔
1130
}
4✔
1131

1132
TEST(StringIndex_Integer_Increasing)
1133
{
2✔
1134
    const size_t rows = 2000 + 1000000 * TEST_DURATION;
2✔
1135

1136
    // StringIndex could crash if strings ended with one or more 0-bytes
1137
    Table table;
2✔
1138
    auto col = table.add_column(type_Int, "int");
2✔
1139
    table.add_search_index(col);
2✔
1140

1141
    std::multiset<int64_t> reference;
2✔
1142

1143
    for (size_t row = 0; row < rows; row++) {
4,002✔
1144
        int64_t r = fastrand((TEST_DURATION == 0) ? 2000 : 0x100000);
4,000✔
1145
        table.create_object().set(col, r);
4,000✔
1146
        reference.insert(r);
4,000✔
1147
    }
4,000✔
1148

1149
    for (auto obj : table) {
4,000✔
1150
        int64_t v = obj.get<Int>(col);
4,000✔
1151
        size_t c = table.count_int(col, v);
4,000✔
1152
        size_t ref_count = reference.count(v);
4,000✔
1153
        CHECK_EQUAL(c, ref_count);
4,000✔
1154
    }
4,000✔
1155
}
2✔
1156

1157
TEST_TYPES(StringIndex_Duplicate_Values, string_column, nullable_string_column, enum_column, nullable_enum_column)
1158
{
8✔
1159
    TEST_TYPE test_resources;
8✔
1160
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1161

1162
    col.add(s1);
8✔
1163
    col.add(s2);
8✔
1164
    col.add(s3);
8✔
1165
    col.add(s4);
8✔
1166

1167
    // Create a new index on column
1168
    const SearchIndex& ndx = *col.create_search_index();
8✔
1169

1170
    CHECK(!ndx.has_duplicate_values());
8✔
1171

1172
    col.add(s1); // duplicate value
8✔
1173

1174
    CHECK(ndx.has_duplicate_values());
8✔
1175

1176
    // remove and test again.
1177
    col.erase(4);
8✔
1178
    CHECK(!ndx.has_duplicate_values());
8✔
1179
    col.add(s1);
8✔
1180
    CHECK(ndx.has_duplicate_values());
8✔
1181
    col.erase(0);
8✔
1182
    CHECK(!ndx.has_duplicate_values());
8✔
1183
    col.clear();
8✔
1184

1185
    // check emptied set
1186
    CHECK(ndx.is_empty());
8✔
1187
    CHECK(!ndx.has_duplicate_values());
8✔
1188

1189
    const size_t num_rows = 100;
8✔
1190

1191
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1192
        std::string to_insert(util::to_string(i));
800✔
1193
        col.add(to_insert);
800✔
1194
    }
800✔
1195
    CHECK(!ndx.has_duplicate_values());
8✔
1196

1197
    std::string a_string = "a";
8✔
1198
    for (size_t i = 0; i < num_rows; ++i) {
808✔
1199
        col.add(a_string);
800✔
1200
        a_string += "a";
800✔
1201
    }
800✔
1202
    std::string str_num_rows(util::to_string(num_rows));
8✔
1203
    CHECK(!ndx.has_duplicate_values());
8✔
1204
    col.add(a_string);
8✔
1205
    col.add(a_string);
8✔
1206
    CHECK(ndx.has_duplicate_values());
8✔
1207
    col.erase(col.size() - 1);
8✔
1208
    CHECK(!ndx.has_duplicate_values());
8✔
1209

1210
    // Insert into the middle unique value of num_rows
1211
    col.insert(num_rows / 2, str_num_rows);
8✔
1212

1213
    CHECK(!ndx.has_duplicate_values());
8✔
1214

1215
    // Set the next element to be num_rows too
1216
    col.set(num_rows / 2 + 1, str_num_rows);
8✔
1217

1218
    CHECK(ndx.has_duplicate_values());
8✔
1219

1220
    col.clear();
8✔
1221
    CHECK(!ndx.has_duplicate_values());
8✔
1222
    CHECK(col.size() == 0);
8✔
1223
}
8✔
1224

1225
TEST_TYPES(StringIndex_MaxBytes, string_column, nullable_string_column, enum_column, nullable_enum_column)
1226
{
8✔
1227
    TEST_TYPE test_resources;
8✔
1228
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1229

1230
    std::string std_max(StringIndex::s_max_offset, 'a');
8✔
1231
    std::string std_over_max(std_max + "a");
8✔
1232
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
8✔
1233
    StringData max(std_max);
8✔
1234
    StringData over_max(std_over_max);
8✔
1235
    StringData under_max(std_under_max);
8✔
1236

1237
    const SearchIndex& ndx = *col.create_search_index();
8✔
1238

1239
    CHECK_EQUAL(col.size(), 0);
8✔
1240

1241
    auto duplicate_check = [&](size_t num_dups, StringData s) {
120✔
1242
        CHECK(col.size() == 0);
120✔
1243
        for (size_t i = 0; i < num_dups; ++i) {
72,384✔
1244
            col.add(s);
72,264✔
1245
        }
72,264✔
1246
        CHECK_EQUAL(col.size(), num_dups);
120✔
1247
        CHECK(ndx.has_duplicate_values() == (num_dups > 1));
120✔
1248
        CHECK_EQUAL(col.get(0), s);
120✔
1249
        CHECK_EQUAL(col.count(s), num_dups);
120✔
1250
        CHECK_EQUAL(col.find_first(s), 0);
120✔
1251
        col.clear();
120✔
1252
    };
120✔
1253

1254
    std::vector<size_t> num_duplicates_list = {
8✔
1255
        1, 10, REALM_MAX_BPNODE_SIZE - 1, REALM_MAX_BPNODE_SIZE, REALM_MAX_BPNODE_SIZE + 1,
8✔
1256
    };
8✔
1257
    for (auto& dups : num_duplicates_list) {
40✔
1258
        duplicate_check(dups, under_max);
40✔
1259
        duplicate_check(dups, max);
40✔
1260
        duplicate_check(dups, over_max);
40✔
1261
    }
40✔
1262
}
8✔
1263

1264

1265
// There is a corner case where two very long strings are
1266
// inserted into the string index which are identical except
1267
// for the characters at the end (they have an identical very
1268
// long prefix). This was causing a stack overflow because of
1269
// the recursive nature of the insert function.
1270
TEST_TYPES(StringIndex_InsertLongPrefix, string_column, nullable_string_column, enum_column, nullable_enum_column)
1271
{
8✔
1272
    TEST_TYPE test_resources;
8✔
1273
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1274
    const SearchIndex& ndx = *col.create_search_index();
8✔
1275

1276
    col.add("test_index_string1");
8✔
1277
    col.add("test_index_string2");
8✔
1278

1279
    CHECK_EQUAL(col.find_first("test_index_string1"), 0);
8✔
1280
    CHECK_EQUAL(col.find_first("test_index_string2"), 1);
8✔
1281

1282
    std::string std_base(107, 'a');
8✔
1283
    std::string std_base_b = std_base + "b";
8✔
1284
    std::string std_base_c = std_base + "c";
8✔
1285
    StringData base_b(std_base_b);
8✔
1286
    StringData base_c(std_base_c);
8✔
1287
    col.add(base_b);
8✔
1288
    ndx.verify();
8✔
1289
    col.add(base_c);
8✔
1290
    ndx.verify();
8✔
1291

1292
    CHECK_EQUAL(col.find_first(base_b), 2);
8✔
1293
    CHECK_EQUAL(col.find_first(base_c), 3);
8✔
1294

1295
    // To trigger the bug, the length must be more than 10000.
1296
    // Array::destroy_deep() will stack overflow at around recursion depths of
1297
    // lengths > 90000 on mac and less on android devices.
1298
    std::string std_base2(100000, 'a');
8✔
1299
    std::string std_base2_b = std_base2 + "b";
8✔
1300
    std::string std_base2_c = std_base2 + "c";
8✔
1301
    StringData base2(std_base2);
8✔
1302
    StringData base2_b(std_base2_b);
8✔
1303
    StringData base2_c(std_base2_c);
8✔
1304
    col.add(base2_b);
8✔
1305
    ndx.verify();
8✔
1306
    col.add(base2_c);
8✔
1307
    ndx.verify();
8✔
1308

1309
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1310
    CHECK_EQUAL(col.find_first(base2_c), 5);
8✔
1311

1312
    col.add(base2);
8✔
1313
    CHECK(!ndx.has_duplicate_values());
8✔
1314
    ndx.verify();
8✔
1315
    col.add(base2_b); // adds a duplicate in the middle of the list
8✔
1316

1317
    CHECK(ndx.has_duplicate_values());
8✔
1318
    std::vector<ObjKey> find_all_result;
8✔
1319
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1320
    ndx.find_all(find_all_result, base2_b);
8✔
1321
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1322
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1323
    CHECK_EQUAL(find_all_result[1], col.key(7));
8✔
1324
    find_all_result.clear();
8✔
1325
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1326
    col.verify();
8✔
1327

1328
    col.erase(7);
8✔
1329
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1330
    CHECK_EQUAL(ndx.count(base2_b), 1);
8✔
1331
    ndx.find_all(find_all_result, base2_b);
8✔
1332
    CHECK_EQUAL(find_all_result.size(), 1);
8✔
1333
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1334
    find_all_result.clear();
8✔
1335
    col.verify();
8✔
1336

1337
    col.set(6, base2_b);
8✔
1338
    CHECK_EQUAL(ndx.count(base2_b), 2);
8✔
1339
    CHECK_EQUAL(col.find_first(base2_b), 4);
8✔
1340
    ndx.find_all(find_all_result, base2_b);
8✔
1341
    CHECK_EQUAL(find_all_result.size(), 2);
8✔
1342
    CHECK_EQUAL(find_all_result[0], col.key(4));
8✔
1343
    CHECK_EQUAL(find_all_result[1], col.key(6));
8✔
1344
    col.verify();
8✔
1345

1346
    col.clear(); // calls recursive function Array::destroy_deep()
8✔
1347
}
8✔
1348

1349
TEST_TYPES(StringIndex_InsertLongPrefixAndQuery, string_column, nullable_string_column, enum_column,
1350
           nullable_enum_column)
1351
{
8✔
1352
    constexpr int half_node_size = REALM_MAX_BPNODE_SIZE / 2;
8✔
1353
    bool nullable_column = TEST_TYPE::is_nullable();
8✔
1354
    Group g;
8✔
1355
    auto t = g.add_table("StringsOnly");
8✔
1356
    auto col = t->add_column(type_String, "first", nullable_column);
8✔
1357
    t->add_search_index(col);
8✔
1358

1359
    std::string base(StringIndex::s_max_offset, 'a');
8✔
1360
    std::string str_a = base + "aaaaa";
8✔
1361
    std::string str_a0 = base + "aaaa0";
8✔
1362
    std::string str_ax = base + "aaaax";
8✔
1363
    std::string str_b = base + "bbbbb";
8✔
1364
    std::string str_c = base + "ccccc";
8✔
1365
    std::string str_c0 = base + "cccc0";
8✔
1366
    std::string str_cx = base + "ccccx";
8✔
1367

1368
    for (int i = 0; i < half_node_size * 3; i++) {
12,008✔
1369
        t->create_object().set(col, str_a);
12,000✔
1370
        t->create_object().set(col, str_b);
12,000✔
1371
        t->create_object().set(col, str_c);
12,000✔
1372
    }
12,000✔
1373
    t->create_object().set(col, str_ax);
8✔
1374
    t->create_object().set(col, str_ax);
8✔
1375
    t->create_object().set(col, str_a0);
8✔
1376
    /*
1377
    {
1378
        std::ofstream o("index.dot");
1379
        index->to_dot(o, "");
1380
    }
1381
    */
1382
    if (TEST_TYPE::is_enumerated())
8✔
1383
        t->enumerate_string_column(col);
4✔
1384

1385
    auto ndx_a = t->where().equal(col, StringData(str_a)).find();
8✔
1386
    auto cnt = t->count_string(col, StringData(str_a));
8✔
1387
    auto tw_a = t->where().equal(col, StringData(str_a)).find_all();
8✔
1388
    CHECK_EQUAL(ndx_a, ObjKey(0));
8✔
1389
    CHECK_EQUAL(cnt, half_node_size * 3);
8✔
1390
    CHECK_EQUAL(tw_a.size(), half_node_size * 3);
8✔
1391
    ndx_a = t->where().equal(col, StringData(str_c0)).find();
8✔
1392
    CHECK_EQUAL(ndx_a, null_key);
8✔
1393
    ndx_a = t->where().equal(col, StringData(str_cx)).find();
8✔
1394
    CHECK_EQUAL(ndx_a, null_key);
8✔
1395
    // Find string that is 'less' than strings in the table, but with identical last key
1396
    tw_a = t->where().equal(col, StringData(str_c0)).find_all();
8✔
1397
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1398
    // Find string that is 'greater' than strings in the table, but with identical last key
1399
    tw_a = t->where().equal(col, StringData(str_cx)).find_all();
8✔
1400
    CHECK_EQUAL(tw_a.size(), 0);
8✔
1401

1402
    // Same as above, but just for 'count' method
1403
    cnt = t->count_string(col, StringData(str_c0));
8✔
1404
    CHECK_EQUAL(cnt, 0);
8✔
1405
    cnt = t->count_string(col, StringData(str_cx));
8✔
1406
    CHECK_EQUAL(cnt, 0);
8✔
1407
}
8✔
1408

1409

1410
TEST(StringIndex_Fuzzy)
1411
{
2✔
1412
    constexpr size_t chunkcount = 50;
2✔
1413
    constexpr size_t rowcount = 100 + 1000 * TEST_DURATION;
2✔
1414

1415
    for (size_t main_rounds = 0; main_rounds < 2 + 10 * TEST_DURATION; main_rounds++) {
6✔
1416

1417
        Group g;
4✔
1418

1419
        auto t = g.add_table("StringsOnly");
4✔
1420
        auto col0 = t->add_column(type_String, "first");
4✔
1421
        auto col1 = t->add_column(type_String, "second");
4✔
1422

1423
        t->add_search_index(col0);
4✔
1424

1425
        std::string strings[chunkcount];
4✔
1426

1427
        for (size_t j = 0; j < chunkcount; j++) {
204✔
1428
            size_t len = fastrand() % REALM_MAX_BPNODE_SIZE;
200✔
1429

1430
            for (size_t i = 0; i < len; i++)
102,823✔
1431
                strings[j] += char(fastrand());
102,623✔
1432
        }
200✔
1433

1434
        for (size_t rows = 0; rows < rowcount; rows++) {
404✔
1435
            // Strings consisting of 2 concatenated strings are very interesting
1436
            size_t chunks;
400✔
1437
            if (fastrand() % 2 == 0)
400✔
1438
                chunks = fastrand() % 4;
196✔
1439
            else
204✔
1440
                chunks = 2;
204✔
1441

1442
            std::string str;
400✔
1443

1444
            for (size_t c = 0; c < chunks; c++) {
1,081✔
1445
                str += strings[fastrand() % chunkcount];
681✔
1446
            }
681✔
1447

1448
            t->create_object().set_all(str, str);
400✔
1449
        }
400✔
1450

1451
        for (size_t rounds = 0; rounds < 1 + 10 * TEST_DURATION; rounds++) {
8✔
1452
            for (auto obj : *t) {
400✔
1453

1454
                TableView tv0 = (t->column<String>(col0) == obj.get<String>(col0)).find_all();
400✔
1455
                TableView tv1 = (t->column<String>(col1) == obj.get<String>(col1)).find_all();
400✔
1456

1457
                CHECK_EQUAL(tv0.size(), tv1.size());
400✔
1458

1459
                for (size_t v = 0; v < tv0.size(); v++) {
2,006✔
1460
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
1,606✔
1461
                }
1,606✔
1462
            }
400✔
1463

1464

1465
            for (size_t r = 0; r < 5 + 1000 * TEST_DURATION; r++) {
24✔
1466
                size_t chunks;
20✔
1467
                if (fastrand() % 2 == 0)
20✔
1468
                    chunks = fastrand() % 4;
12✔
1469
                else
8✔
1470
                    chunks = 2;
8✔
1471

1472
                std::string str;
20✔
1473

1474
                for (size_t c = 0; c < chunks; c++) {
47✔
1475
                    str += strings[fastrand() % chunkcount];
27✔
1476
                }
27✔
1477

1478
                TableView tv0 = (t->column<String>(col0) == str).find_all();
20✔
1479
                TableView tv1 = (t->column<String>(col1) == str).find_all();
20✔
1480

1481
                CHECK_EQUAL(tv0.size(), tv1.size());
20✔
1482

1483
                for (size_t v = 0; v < tv0.size(); v++) {
107✔
1484
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
87✔
1485
                }
87✔
1486
            }
20✔
1487
            if (t->size() > 10)
4✔
1488
                t->get_object(0).remove();
4✔
1489

1490
            size_t r1 = fastrand() % t->size();
4✔
1491
            size_t r2 = fastrand() % t->size();
4✔
1492

1493
            std::string str = t->get_object(r2).get<String>(col0);
4✔
1494
            Obj obj = t->get_object(r1);
4✔
1495
            obj.set<String>(col0, StringData(str));
4✔
1496
            obj.set<String>(col1, StringData(str));
4✔
1497
        }
4✔
1498
    }
4✔
1499
}
2✔
1500

1501
namespace {
1502

1503
// results returned by the index should be in ascending row order
1504
// this requirement is assumed by the query system which runs find_gte
1505
// and this will return wrong results unless the results are ordered
1506
void check_result_order(const std::vector<ObjKey>& results, TestContext& test_context)
1507
{
88✔
1508
    const size_t num_results = results.size();
88✔
1509
    for (size_t i = 1; i < num_results; ++i) {
288✔
1510
        CHECK(results[i - 1] < results[i]);
200✔
1511
    }
200✔
1512
}
88✔
1513

1514
} // end anonymous namespace
1515

1516

1517
TEST_TYPES(StringIndex_Insensitive, string_column, nullable_string_column, enum_column, nullable_enum_column)
1518
{
8✔
1519
    TEST_TYPE test_resources;
8✔
1520
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1521

1522
    const char* strings[] = {"john",
8✔
1523
                             "John",
8✔
1524
                             "jOhn",
8✔
1525
                             "JOhn",
8✔
1526
                             "joHn",
8✔
1527
                             "JoHn",
8✔
1528
                             "jOHn",
8✔
1529
                             "JOHn",
8✔
1530
                             "johN",
8✔
1531
                             "JohN",
8✔
1532
                             "jOhN",
8✔
1533
                             "JOhN",
8✔
1534
                             "joHN",
8✔
1535
                             "JoHN",
8✔
1536
                             "jOHN",
8✔
1537
                             "JOHN",
8✔
1538
                             "john" /* yes, an extra to test the "bucket" case as well */,
8✔
1539
                             "hans",
8✔
1540
                             "Hansapark",
8✔
1541
                             "george",
8✔
1542
                             "billion dollar startup",
8✔
1543
                             "abcde",
8✔
1544
                             "abcdE",
8✔
1545
                             "Abcde",
8✔
1546
                             "AbcdE",
8✔
1547
                             "common",
8✔
1548
                             "common"};
8✔
1549

1550
    for (const char* string : strings) {
216✔
1551
        col.add(string);
216✔
1552
    }
216✔
1553

1554
    // Generate 255 strings with 1..255 'a' chars
1555
    for (int i = 1; i < 256; ++i) {
2,048✔
1556
        col.add(std::string(i, 'a').c_str());
2,040✔
1557
    }
2,040✔
1558

1559
    // Create a new index on column
1560
    const SearchIndex& ndx = *col.create_search_index();
8✔
1561

1562
    std::vector<ObjKey> results;
8✔
1563
    {
8✔
1564
        // case sensitive
1565
        ndx.find_all(results, strings[0]);
8✔
1566
        CHECK_EQUAL(2, results.size());
8✔
1567
        CHECK_EQUAL(col.get(results[0]), strings[0]);
8✔
1568
        CHECK_EQUAL(col.get(results[1]), strings[0]);
8✔
1569
        check_result_order(results, test_context);
8✔
1570
        results.clear();
8✔
1571
    }
8✔
1572

1573
    {
8✔
1574
        constexpr bool case_insensitive = true;
8✔
1575
        const char* needle = "john";
8✔
1576
        auto upper_needle = case_map(needle, true);
8✔
1577
        ndx.find_all(results, needle, case_insensitive);
8✔
1578
        CHECK_EQUAL(17, results.size());
8✔
1579
        for (size_t i = 0; i < results.size(); ++i) {
144✔
1580
            auto upper_result = case_map(col.get(results[i]), true);
136✔
1581
            CHECK_EQUAL(upper_result, upper_needle);
136✔
1582
        }
136✔
1583
        check_result_order(results, test_context);
8✔
1584
        results.clear();
8✔
1585
    }
8✔
1586

1587

1588
    {
8✔
1589
        struct TestData {
8✔
1590
            const bool case_insensitive;
8✔
1591
            const char* const needle;
8✔
1592
            const size_t result_size;
8✔
1593
        };
8✔
1594

1595
        TestData td[] = {
8✔
1596
            {true, "Hans", 1},
8✔
1597
            {true, "Geor", 0},
8✔
1598
            {true, "George", 1},
8✔
1599
            {true, "geoRge", 1},
8✔
1600
            {true, "Billion Dollar Startup", 1},
8✔
1601
            {true, "ABCDE", 4},
8✔
1602
            {true, "commON", 2},
8✔
1603
        };
8✔
1604

1605
        for (const TestData& t : td) {
56✔
1606
            ndx.find_all(results, t.needle, t.case_insensitive);
56✔
1607
            CHECK_EQUAL(t.result_size, results.size());
56✔
1608
            check_result_order(results, test_context);
56✔
1609
            results.clear();
56✔
1610
        }
56✔
1611
    }
8✔
1612

1613
    // Test generated 'a'-strings
1614
    for (int i = 1; i < 256; ++i) {
2,048✔
1615
        const std::string str = std::string(i, 'A');
2,040✔
1616
        ndx.find_all(results, str.c_str(), false);
2,040✔
1617
        CHECK_EQUAL(0, results.size());
2,040✔
1618
        ndx.find_all(results, str.c_str(), true);
2,040✔
1619
        CHECK_EQUAL(1, results.size());
2,040✔
1620
        results.clear();
2,040✔
1621
    }
2,040✔
1622
}
8✔
1623

1624

1625
/* Disabled until we have better support for case mapping unicode characters
1626

1627
TEST_TYPES(StringIndex_Insensitive_Unicode, non_nullable, nullable)
1628
{
1629
    constexpr bool nullable = TEST_TYPE::value;
1630

1631
    // Create a column with string values
1632
    ref_type ref = StringColumn::create(Allocator::get_default());
1633
    StringColumn col(Allocator::get_default(), ref, nullable);
1634

1635
    const char* strings[] = {
1636
        "æøå", "ÆØÅ",
1637
    };
1638

1639
    for (const char* string : strings) {
1640
        col.add(string);
1641
    }
1642

1643
    // Create a new index on column
1644
    const SearchIndex& ndx = *col.create_search_index();
1645

1646
    ref_type results_ref = IntegerColumn::create(Allocator::get_default());
1647
    IntegerColumn results(Allocator::get_default(), results_ref);
1648

1649
    {
1650
        struct TestData {
1651
            const bool case_insensitive;
1652
            const char* const needle;
1653
            const size_t result_size;
1654
        };
1655

1656
        TestData td[] = {
1657
            {false, "æøå", 1},
1658
            {false, "ÆØÅ", 1},
1659
            {true, "æøå", 2},
1660
            {true, "Æøå", 2},
1661
            {true, "æØå", 2},
1662
            {true, "ÆØå", 2},
1663
            {true, "æøÅ", 2},
1664
            {true, "ÆøÅ", 2},
1665
            {true, "æØÅ", 2},
1666
            {true, "ÆØÅ", 2},
1667
        };
1668

1669
        for (const TestData& t : td) {
1670
            ndx.find_all(results, t.needle, t.case_insensitive);
1671
            CHECK_EQUAL(t.result_size, results.size());
1672
            results.clear();
1673
        }
1674
    }
1675

1676
    // Clean up
1677
    results.destroy();
1678
    col.destroy();
1679
}
1680

1681
*/
1682

1683

1684
TEST_TYPES(StringIndex_45, string_column, nullable_string_column, enum_column, nullable_enum_column)
1685
{
8✔
1686
    TEST_TYPE test_resources;
8✔
1687
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1688
    const SearchIndex& ndx = *col.create_search_index();
8✔
1689
    std::string a4 = std::string(4, 'a');
8✔
1690
    std::string A5 = std::string(5, 'A');
8✔
1691

1692
    col.add(a4);
8✔
1693
    col.add(a4);
8✔
1694

1695
    std::vector<ObjKey> res;
8✔
1696

1697
    ndx.find_all(res, A5.c_str(), true);
8✔
1698
    CHECK_EQUAL(res.size(), 0);
8✔
1699
}
8✔
1700

1701

1702
namespace {
1703

1704
std::string create_random_a_string(size_t max_len)
1705
{
×
1706
    std::string s;
×
1707
    size_t len = size_t(fastrand(max_len));
×
1708
    for (size_t p = 0; p < len; p++) {
×
1709
        s += fastrand(1) == 0 ? 'a' : 'A';
×
1710
    }
×
1711
    return s;
×
1712
}
×
1713

1714
} // namespace
1715

1716

1717
// Excluded when run with valgrind because it takes a long time
1718
TEST_TYPES_IF(StringIndex_Insensitive_Fuzz, TEST_DURATION > 1, string_column, nullable_string_column, enum_column,
1719
              nullable_enum_column)
1720
{
×
1721
    const size_t max_str_len = 9;
×
1722
    const size_t iters = 3;
×
1723

1724
    for (size_t iter = 0; iter < iters; iter++) {
×
1725
        TEST_TYPE test_resources;
×
1726
        typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
1727

1728
        size_t rows = size_t(fastrand(2 * REALM_MAX_BPNODE_SIZE - 1));
×
1729

1730
        // Add 'rows' number of rows in the column
1731
        for (size_t t = 0; t < rows; t++) {
×
1732
            std::string str = create_random_a_string(max_str_len);
×
1733
            col.add(str);
×
1734
        }
×
1735

1736
        const SearchIndex& ndx = *col.create_search_index();
×
1737

1738
        for (size_t t = 0; t < 1000; t++) {
×
1739
            std::string needle = create_random_a_string(max_str_len);
×
1740

1741
            std::vector<ObjKey> res;
×
1742

1743
            ndx.find_all(res, needle.c_str(), true);
×
1744
            check_result_order(res, test_context);
×
1745

1746
            // Check that all items in 'res' point at a match in 'col'
1747
            auto needle_upper = case_map(needle, true);
×
1748
            for (size_t res_ndx = 0; res_ndx < res.size(); res_ndx++) {
×
1749
                auto res_upper = case_map(col.get(res[res_ndx]), true);
×
1750
                CHECK_EQUAL(res_upper, needle_upper);
×
1751
            }
×
1752

1753
            // Check that all matches in 'col' exist in 'res'
1754
            for (size_t col_ndx = 0; col_ndx < col.size(); col_ndx++) {
×
1755
                auto str_upper = case_map(col.get(col_ndx), true);
×
1756
                if (str_upper == needle_upper) {
×
1757
                    CHECK(std::find(res.begin(), res.end(), col.key(col_ndx)) != res.end());
×
1758
                }
×
1759
            }
×
1760
        }
×
1761
    }
×
1762
}
×
1763

1764
// Exercise the StringIndex case insensitive search for strings with very long, common prefixes
1765
// to cover the special case code paths where different strings are stored in a list.
1766
TEST_TYPES(StringIndex_Insensitive_VeryLongStrings, string_column, nullable_string_column, enum_column,
1767
           nullable_enum_column)
1768
{
8✔
1769
    TEST_TYPE test_resources;
8✔
1770
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1771
    const SearchIndex& ndx = *col.create_search_index();
8✔
1772

1773
    std::string long1 = std::string(StringIndex::s_max_offset + 10, 'a');
8✔
1774
    std::string long2 = long1 + "b";
8✔
1775
    std::string long3 = long1 + "c";
8✔
1776

1777
    // Add the strings in a "random" order
1778
    col.add(long1);
8✔
1779
    col.add(long2);
8✔
1780
    col.add(long2);
8✔
1781
    col.add(long1);
8✔
1782
    col.add(long3);
8✔
1783
    col.add(long2);
8✔
1784
    col.add(long1);
8✔
1785
    col.add(long1);
8✔
1786

1787
    std::vector<ObjKey> results;
8✔
1788

1789
    ndx.find_all(results, long1.c_str(), true);
8✔
1790
    CHECK_EQUAL(results.size(), 4);
8✔
1791
    check_result_order(results, test_context);
8✔
1792
    results.clear();
8✔
1793
    ndx.find_all(results, long2.c_str(), true);
8✔
1794
    CHECK_EQUAL(results.size(), 3);
8✔
1795
    results.clear();
8✔
1796
    ndx.find_all(results, long3.c_str(), true);
8✔
1797
    CHECK_EQUAL(results.size(), 1);
8✔
1798
    results.clear();
8✔
1799
}
8✔
1800

1801

1802
// Bug with case insensitive search on numbers that gives duplicate results
1803
TEST_TYPES(StringIndex_Insensitive_Numbers, string_column, nullable_string_column, enum_column, nullable_enum_column)
1804
{
8✔
1805
    TEST_TYPE test_resources;
8✔
1806
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1807
    const SearchIndex& ndx = *col.create_search_index();
8✔
1808

1809
    constexpr const char* number_string_16 = "1111111111111111";
8✔
1810
    constexpr const char* number_string_17 = "11111111111111111";
8✔
1811

1812
    col.add(number_string_16);
8✔
1813
    col.add(number_string_17);
8✔
1814

1815
    std::vector<ObjKey> results;
8✔
1816

1817
    ndx.find_all(results, number_string_16, true);
8✔
1818
    CHECK_EQUAL(results.size(), 1);
8✔
1819
}
8✔
1820

1821

1822
TEST_TYPES(StringIndex_Rover, string_column, nullable_string_column, enum_column, nullable_enum_column)
1823
{
8✔
1824
    TEST_TYPE test_resources;
8✔
1825
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
8✔
1826

1827
    const SearchIndex& ndx = *col.create_search_index();
8✔
1828

1829
    col.add("ROVER");
8✔
1830
    col.add("Rover");
8✔
1831

1832
    std::vector<ObjKey> results;
8✔
1833

1834
    ndx.find_all(results, "rover", true);
8✔
1835
    CHECK_EQUAL(results.size(), 2);
8✔
1836
    check_result_order(results, test_context);
8✔
1837
}
8✔
1838

1839
TEST(StringIndex_QuerySingleObject)
1840
{
2✔
1841
    Group g;
2✔
1842
    auto table = g.add_table_with_primary_key("class_StringClass", type_String, "name", true);
2✔
1843
    table->create_object_with_primary_key("Foo");
2✔
1844

1845
    auto q = table->where().equal(table->get_column_key("name"), "Foo", true);
2✔
1846
    CHECK_EQUAL(q.count(), 1);
2✔
1847
    q = table->where().equal(table->get_column_key("name"), "Bar", true);
2✔
1848
    CHECK_EQUAL(q.count(), 0);
2✔
1849
}
2✔
1850

1851
TEST(StringIndex_MixedNonEmptyTable)
1852
{
2✔
1853
    Group g;
2✔
1854
    auto table = g.add_table("foo");
2✔
1855
    auto col = table->add_column(type_Mixed, "any");
2✔
1856
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1857
    table->add_search_index(col);
2✔
1858
}
2✔
1859

1860
TEST(StringIndex_MixedWithNestedCollections)
1861
{
2✔
1862
    Group g;
2✔
1863
    auto table = g.add_table("foo");
2✔
1864
    auto col = table->add_column(type_Mixed, "value");
2✔
1865
    table->add_search_index(col);
2✔
1866
    table->create_object().set(col, Mixed("apple"));
2✔
1867
    auto obj = table->create_object();
2✔
1868
    obj.set(col, Mixed("banana"));
2✔
1869

1870
    auto q = table->query("value = 'banana'");
2✔
1871

1872
    CHECK_EQUAL(q.count(), 1);
2✔
1873
    obj.set_collection(col, CollectionType::Dictionary);
2✔
1874
    CHECK_EQUAL(q.count(), 0);
2✔
1875
    obj.set(col, Mixed("banana"));
2✔
1876
    CHECK_EQUAL(q.count(), 1);
2✔
1877
}
2✔
1878

1879
TEST(StringIndex_MixedEqualBitPattern)
1880
{
2✔
1881
    Group g;
2✔
1882
    auto table = g.add_table("foo");
2✔
1883
    auto col = table->add_column(type_Mixed, "any");
2✔
1884
    table->add_search_index(col);
2✔
1885

1886
    Mixed val1(int64_t(0x6867666564636261));
2✔
1887
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1888
    // From single value to list
1889
    table->create_object().set(col, val1);
2✔
1890

1891
    auto tv = table->where().equal(col, val1).find_all();
2✔
1892
    CHECK_EQUAL(tv.size(), 1);
2✔
1893
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1894

1895
    table->clear();
2✔
1896
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1897
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1898
    // Insert in existing list
1899
    table->create_object().set(col, val1);
2✔
1900

1901
    tv = table->where().equal(col, val1).find_all();
2✔
1902
    CHECK_EQUAL(tv.size(), 1);
2✔
1903
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1904
    tv = table->where().equal(col, Mixed("abcdefgh")).find_all();
2✔
1905
    CHECK_EQUAL(tv.size(), 2);
2✔
1906

1907
    // Add another one into existing list
1908
    table->create_object().set(col, val1);
2✔
1909
    tv = table->where().equal(col, val1).find_all();
2✔
1910
    CHECK_EQUAL(tv.size(), 2);
2✔
1911
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1912
    CHECK_EQUAL(tv.get_object(1).get_any(col), val1);
2✔
1913
}
2✔
1914

1915
TEST(Unicode_Casemap)
1916
{
2✔
1917
    std::string inp = "±ÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß×÷";
2✔
1918
    auto out = case_map(inp, false);
2✔
1919
    if (CHECK(out)) {
2✔
1920
        CHECK_EQUAL(*out, "±àáâãäåæèéêëìíîïñòóôõöøùúûüýß×÷");
2✔
1921
    }
2✔
1922
    out = case_map(*out, true);
2✔
1923
    if (CHECK(out)) {
2✔
1924
        CHECK_EQUAL(*out, inp);
2✔
1925
    }
2✔
1926

1927
    inp = "A very old house 🏠 is on 🔥, we have to save the 🦄";
2✔
1928
    out = case_map(inp, true);
2✔
1929
    if (CHECK(out)) {
2✔
1930
        CHECK_EQUAL(*out, "A VERY OLD HOUSE 🏠 IS ON 🔥, WE HAVE TO SAVE THE 🦄");
2✔
1931
    }
2✔
1932

1933
    StringData trailing_garbage(inp.data(), 19); // String terminated inside icon
2✔
1934
    out = case_map(trailing_garbage, true);
2✔
1935
    CHECK_NOT(out);
2✔
1936

1937
    inp = "rødgrød med fløde";
2✔
1938
    out = case_map(inp, true);
2✔
1939
    if (CHECK(out)) {
2✔
1940
        CHECK_EQUAL(*out, "RØDGRØD MED FLØDE");
2✔
1941
    }
2✔
1942
    out = case_map(out, false);
2✔
1943
    if (CHECK(out)) {
2✔
1944
        CHECK_EQUAL(*out, inp);
2✔
1945
    }
2✔
1946
}
2✔
1947

1948
static std::string random_string(std::string::size_type length)
1949
{
60,000✔
1950
    static auto& chrs = "0123456789"
60,000✔
1951
                        "abcdefghijklmnopqrstuvwxyz"
60,000✔
1952
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
60,000✔
1953

1954
    thread_local static std::mt19937 rg{std::random_device{}()};
60,000✔
1955
    thread_local static std::uniform_int_distribution<std::string::size_type> pick(0, sizeof(chrs) - 2);
60,000✔
1956

1957
    std::string s;
60,000✔
1958

1959
    s.reserve(length);
60,000✔
1960

1961
    while (length--)
900,000✔
1962
        s += chrs[pick(rg)];
840,000✔
1963

1964
    return s;
60,000✔
1965
}
60,000✔
1966

1967
TEST(StringIndex_ListOfRandomStrings)
1968
{
2✔
1969
    using namespace std::chrono;
2✔
1970

1971
    SHARED_GROUP_TEST_PATH(path);
2✔
1972
    auto db = DB::create(path);
2✔
1973
    auto wt = db->start_write();
2✔
1974

1975
    auto t = wt->add_table_with_primary_key("foo", type_Int, "_id");
2✔
1976
    ColKey col_codes = t->add_column_list(type_String, "codes");
2✔
1977
    std::string some_string;
2✔
1978

1979
    for (size_t i = 0; i < 10000; i++) {
20,002✔
1980
        auto obj = t->create_object_with_primary_key(int64_t(i));
20,000✔
1981
        auto list = obj.get_list<String>(col_codes);
20,000✔
1982
        for (size_t j = 0; j < 3; j++) {
80,000✔
1983
            std::string str(random_string(14));
60,000✔
1984
            if (i == 5000 && j == 0) {
60,000✔
1985
                some_string = str;
2✔
1986
            }
2✔
1987
            list.add(StringData(str));
60,000✔
1988
        }
60,000✔
1989
    }
20,000✔
1990

1991
    std::vector<Mixed> arguments{Mixed(some_string)};
2✔
1992
    auto q = wt->get_table("foo")->query("codes = $0", arguments);
2✔
1993
    // auto t1 = steady_clock::now();
1994
    auto tv = q.find_all();
2✔
1995
    // auto t2 = steady_clock::now();
1996
    // std::cout << "time without index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1997
    CHECK_EQUAL(tv.size(), 1);
2✔
1998
    t->add_search_index(col_codes);
2✔
1999

2000
    // t1 = steady_clock::now();
2001
    tv = q.find_all();
2✔
2002
    // t2 = steady_clock::now();
2003
    // std::cout << "time with index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
2004
    CHECK_EQUAL(tv.size(), 1);
2✔
2005
    t->add_search_index(col_codes);
2✔
2006

2007
    // std::cout << tv.get_object(0).get<Int>("_id") << std::endl;
2008
}
2✔
2009

2010
TEST_TYPES(StringIndex_ListOfStrings, std::true_type, std::false_type)
2011
{
4✔
2012
    constexpr bool add_index = TEST_TYPE::value;
4✔
2013
    Group g;
4✔
2014

2015
    auto t = g.add_table("foo");
4✔
2016
    ColKey col = t->add_column_list(type_String, "names", true);
4✔
2017
    if constexpr (add_index) {
4✔
2018
        t->add_search_index(col);
2✔
2019
    }
2✔
2020

2021
    auto obj1 = t->create_object();
4✔
2022
    auto obj2 = t->create_object();
4✔
2023
    auto obj3 = t->create_object();
4✔
2024

2025
    for (Obj* obj : {&obj2, &obj3}) {
8✔
2026
        auto list = obj->get_list<String>(col);
8✔
2027
        list.add("Johnny");
8✔
2028
        list.add("John");
8✔
2029
    }
8✔
2030

2031
    auto list = obj1.get_list<String>(col);
4✔
2032
    list.add("Johnny");
4✔
2033
    list.add("John");
4✔
2034
    list.add("Ivan");
4✔
2035
    list.add("Ivan");
4✔
2036
    list.add(StringData());
4✔
2037

2038
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2039
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 3);
4✔
2040
    CHECK_EQUAL(t->query(R"(names = NULL)").count(), 1);
4✔
2041

2042
    list.set(0, "Paul");
4✔
2043
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
2044
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2045
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2046

2047
    list.remove(1);
4✔
2048
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2049
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2050
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2051
    CHECK_EQUAL(t->query(R"(names = "Ivan")").count(), 1);
4✔
2052

2053
    list.clear();
4✔
2054
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2055
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2056
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 0);
4✔
2057

2058
    list = obj2.get_list<String>(col);
4✔
2059
    list.insert(0, "Adam");
4✔
2060
    list.insert(0, "Adam");
4✔
2061
    obj2.remove();
4✔
2062
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 1);
4✔
2063
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 1);
4✔
2064

2065
    std::string long1 = std::string(StringIndex::s_max_offset, 'a');
4✔
2066
    std::string long2 = long1 + "b";
4✔
2067

2068
    list = obj1.get_list<String>(col);
4✔
2069
    list.add(long1);
4✔
2070
    if (add_index) {
4✔
2071
        CHECK_THROW_ANY(list.add(long2));
2✔
2072
    }
2✔
2073
}
4✔
2074

2075
#endif // TEST_INDEX_STRING
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc