• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

realm / realm-core / jorgen.edelbo_402

21 Aug 2024 11:10AM UTC coverage: 91.054% (-0.03%) from 91.085%
jorgen.edelbo_402

Pull #7803

Evergreen

jedelbo
Small fix to Table::typed_write

When writing the realm to a new file from a write transaction,
the Table may be COW so that the top ref is changed. So don't
use the ref that is present in the group when the operation starts.
Pull Request #7803: Feature/string compression

103494 of 181580 branches covered (57.0%)

1929 of 1999 new or added lines in 46 files covered. (96.5%)

695 existing lines in 51 files now uncovered.

220142 of 241772 relevant lines covered (91.05%)

7344461.76 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.4
/test/test_index_string.cpp
1
/*************************************************************************
2
 *
3
 * Copyright 2016 Realm Inc.
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 **************************************************************************/
18

19
#include "testsettings.hpp"
20
#ifdef TEST_INDEX_STRING
21

22
#include <realm.hpp>
23
#include <realm/index_string.hpp>
24
#include <realm/query_expression.hpp>
25
#include <realm/tokenizer.hpp>
26
#include <realm/util/to_string.hpp>
27
#include <set>
28
#include "test.hpp"
29
#include "util/misc.hpp"
30
#include "util/random.hpp"
31

32
using namespace realm;
33
using namespace util;
34
using namespace realm;
35
using namespace realm::util;
36
using namespace realm::test_util;
37
using unit_test::TestContext;
38

39
// Test independence and thread-safety
40
// -----------------------------------
41
//
42
// All tests must be thread safe and independent of each other. This
43
// is required because it allows for both shuffling of the execution
44
// order and for parallelized testing.
45
//
46
// In particular, avoid using std::rand() since it is not guaranteed
47
// to be thread safe. Instead use the API offered in
48
// `test/util/random.hpp`.
49
//
50
// All files created in tests must use the TEST_PATH macro (or one of
51
// its friends) to obtain a suitable file system path. See
52
// `test/util/test_path.hpp`.
53
//
54
//
55
// Debugging and the ONLY() macro
56
// ------------------------------
57
//
58
// A simple way of disabling all tests except one called `Foo`, is to
59
// replace TEST(Foo) with ONLY(Foo) and then recompile and rerun the
60
// test suite. Note that you can also use filtering by setting the
61
// environment varible `UNITTEST_FILTER`. See `README.md` for more on
62
// this.
63
//
64
// Another way to debug a particular test, is to copy that test into
65
// `experiments/testcase.cpp` and then run `sh build.sh
66
// check-testcase` (or one of its friends) from the command line.
67

68

69
namespace {
70

71
template <typename T>
72
class column {
73
public:
74
    class ColumnTestType {
75
    public:
76
        ColumnTestType(column* owner)
77
            : m_owner(owner)
47✔
78
        {
94✔
79
        }
94✔
80
        const SearchIndex* create_search_index()
81
        {
96✔
82
            m_owner->m_table.add_search_index(m_owner->m_col_key);
96✔
83
            return m_owner->m_table.get_search_index(m_owner->m_col_key);
96✔
84
        }
96✔
85
        ObjKey key(size_t ndx) const
86
        {
94✔
87
            return m_keys[ndx];
94✔
88
        }
94✔
89
        size_t size() const
90
        {
150✔
91
            return m_keys.size();
150✔
92
        }
150✔
93
        void add(T value)
94
        {
41,058✔
95
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
41,058✔
96
            m_keys.push_back(k);
41,058✔
97
        }
41,058✔
98
        void add_null()
99
        {
2✔
100
            auto k = m_owner->m_table.create_object().set_null(m_owner->m_col_key).get_key();
2✔
101
            m_keys.push_back(k);
2✔
102
        }
2✔
103
        void set(size_t ndx, T value)
104
        {
24✔
105
            m_owner->m_table.get_object(m_keys[ndx]).set(m_owner->m_col_key, value);
24✔
106
        }
24✔
107
        void insert(size_t ndx, T value)
108
        {
6✔
109
            auto k = m_owner->m_table.create_object().set(m_owner->m_col_key, value).get_key();
6✔
110
            m_keys.insert(m_keys.begin() + ndx, k);
6✔
111
        }
6✔
112
        T get(size_t ndx)
113
        {
2,188,002✔
114
            return m_owner->m_table.get_object(m_keys[ndx]).template get<T>(m_owner->m_col_key);
2,188,002✔
115
        }
2,188,002✔
116
        T get(ObjKey obj_key)
117
        {
76✔
118
            return m_owner->m_table.get_object(obj_key).template get<T>(m_owner->m_col_key);
76✔
119
        }
76✔
120
        void erase(size_t ndx)
121
        {
94✔
122
            m_owner->m_table.remove_object(m_keys[ndx]);
94✔
123
            m_keys.erase(m_keys.begin() + ndx);
94✔
124
        }
94✔
125
        void clear()
126
        {
82✔
127
            m_owner->m_table.clear();
82✔
128
            m_keys.clear();
82✔
129
        }
82✔
130
        size_t find_first(T value) const
131
        {
2,612✔
132
            auto k = m_owner->m_table.find_first(m_owner->m_col_key, value);
2,612✔
133
            if (k == realm::null_key) {
2,612✔
134
                return realm::npos;
1,265✔
135
            }
1,265✔
136
            auto it = std::find(m_keys.begin(), m_keys.end(), k);
1,347✔
137
            return it - m_keys.begin();
1,347✔
138
        }
2,612✔
139
        size_t count(T value) const
140
        {
80✔
141
            return m_owner->m_table.count_string(m_owner->m_col_key, value);
80✔
142
        }
80✔
143
        void verify()
144
        {
12✔
145
            m_owner->m_table.verify();
12✔
146
        }
12✔
147

148
    private:
149
        column* m_owner;
150
        std::vector<ObjKey> m_keys;
151
    };
152

153
    column(bool nullable = false)
154
        : m_column(this)
47✔
155
    {
94✔
156
        m_col_key = m_table.add_column(ColumnTypeTraits<T>::id, "values", nullable);
94✔
157
    }
94✔
158
    ColumnTestType& get_column()
159
    {
94✔
160
        return m_column;
94✔
161
    }
94✔
162

163
private:
164
    Table m_table;
165
    ColKey m_col_key;
166
    ColumnTestType m_column;
167
};
168

169
class string_column : public column<String> {
170
public:
171
    string_column()
172
        : column(false)
19✔
173
    {
38✔
174
    }
38✔
175
    static bool is_nullable()
176
    {
4✔
177
        return false;
4✔
178
    }
4✔
179
};
180
class nullable_string_column : public column<String> {
181
public:
182
    nullable_string_column()
183
        : column(true)
20✔
184
    {
40✔
185
    }
40✔
186
    static bool is_nullable()
187
    {
4✔
188
        return true;
4✔
189
    }
4✔
190
};
191

192
// disable to avoid warnings about not being used - enable when tests
193
// needed them are enabled again
194

195
// strings used by tests
196
const char s1[] = "John";
197
const char s2[] = "Brian";
198
const char s3[] = "Samantha";
199
const char s4[] = "Tom";
200
const char s5[] = "Johnathan";
201
const char s6[] = "Johnny";
202
const char s7[] = "Sam";
203

204
// integers used by integer index tests
205
std::vector<int64_t> ints = {0x1111,     0x11112222, 0x11113333, 0x1111333, 0x111122223333ull, 0x1111222233334ull,
206
                             0x22223333, 0x11112227, 0x11112227, 0x78923};
207

208
using nullable = std::true_type;
209
using non_nullable = std::false_type;
210

211
} // anonymous namespace
212

213
TEST(Tokenizer_Basic)
214
{
2✔
215
    auto tok = realm::Tokenizer::get_instance();
2✔
216

217
    tok->reset("to be or not to be");
2✔
218
    auto tokens = tok->get_all_tokens();
2✔
219
    CHECK_EQUAL(tokens.size(), 4);
2✔
220

221
    tok->reset("To be or not to be");
2✔
222
    realm::TokenInfoMap info = tok->get_token_info();
2✔
223
    CHECK_EQUAL(info.size(), 4);
2✔
224
    realm::TokenInfo& i(info["to"]);
2✔
225
    CHECK_EQUAL(i.positions.size(), 2);
2✔
226
    CHECK_EQUAL(i.positions[0], 0);
2✔
227
    CHECK_EQUAL(i.positions[1], 4);
2✔
228
    CHECK_EQUAL(i.ranges.size(), 2);
2✔
229
    CHECK_EQUAL(i.ranges[0].first, 0);
2✔
230
    CHECK_EQUAL(i.ranges[0].second, 2);
2✔
231
    CHECK_EQUAL(i.ranges[1].first, 13);
2✔
232
    CHECK_EQUAL(i.ranges[1].second, 15);
2✔
233

234
    tok->reset("Jeg gik mig over sø og land");
2✔
235
    info = tok->get_token_info();
2✔
236
    CHECK_EQUAL(info.size(), 7);
2✔
237
    realm::TokenInfo& j(info["sø"]);
2✔
238
    CHECK_EQUAL(j.ranges[0].first, 17);
2✔
239
    CHECK_EQUAL(j.ranges[0].second, 20);
2✔
240

241
    tok->reset("with-hyphen -term -other-term-plus");
2✔
242
    CHECK(tok->get_all_tokens() == std::set<std::string>({"with", "hyphen", "term", "other", "plus"}));
2✔
243
}
2✔
244

245
TEST(StringIndex_NonIndexable)
246
{
2✔
247
    // Create a column with string values
248
    Group group;
2✔
249
    TableRef table = group.add_table("table");
2✔
250
    TableRef target_table = group.add_table("target");
2✔
251
    table->add_column(*target_table, "link");
2✔
252
    table->add_column_list(*target_table, "linkList");
2✔
253
    table->add_column(type_Double, "double");
2✔
254
    table->add_column(type_Float, "float");
2✔
255
    table->add_column(type_Binary, "binary");
2✔
256

257
    for (auto col : table->get_column_keys()) {
10✔
258
        CHECK_LOGIC_ERROR(table->add_search_index(col), ErrorCodes::IllegalOperation);
10✔
259
    }
10✔
260
}
2✔
261

262
TEST_TYPES(StringIndex_BuildIndex, string_column, nullable_string_column)
263
{
4✔
264
    TEST_TYPE test_resources;
4✔
265
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
266

267
    col.add(s1);
4✔
268
    col.add(s2);
4✔
269
    col.add(s3);
4✔
270
    col.add(s4);
4✔
271
    col.add(s1); // duplicate value
4✔
272
    col.add(s5); // common prefix
4✔
273
    col.add(s6); // common prefix
4✔
274

275
    // Create a new index on column
276
    const SearchIndex& ndx = *col.create_search_index();
4✔
277

278
    const ObjKey r1 = ndx.find_first(s1);
4✔
279
    const ObjKey r2 = ndx.find_first(s2);
4✔
280
    const ObjKey r3 = ndx.find_first(s3);
4✔
281
    const ObjKey r4 = ndx.find_first(s4);
4✔
282
    const ObjKey r5 = ndx.find_first(s5);
4✔
283
    const ObjKey r6 = ndx.find_first(s6);
4✔
284

285
    CHECK_EQUAL(0, r1.value);
4✔
286
    CHECK_EQUAL(1, r2.value);
4✔
287
    CHECK_EQUAL(2, r3.value);
4✔
288
    CHECK_EQUAL(3, r4.value);
4✔
289
    CHECK_EQUAL(5, r5.value);
4✔
290
    CHECK_EQUAL(6, r6.value);
4✔
291
}
4✔
292

293
TEST_TYPES(StringIndex_DeleteAll, string_column, nullable_string_column)
294
{
4✔
295
    TEST_TYPE test_resources;
4✔
296
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
297

298
    col.add(s1);
4✔
299
    col.add(s2);
4✔
300
    col.add(s3);
4✔
301
    col.add(s4);
4✔
302
    col.add(s1); // duplicate value
4✔
303
    col.add(s5); // common prefix
4✔
304
    col.add(s6); // common prefix
4✔
305

306
    // Create a new index on column
307
    const SearchIndex& ndx = *col.create_search_index();
4✔
308

309
    // Delete all entries
310
    // (reverse order to avoid ref updates)
311
    col.erase(6);
4✔
312
    col.erase(5);
4✔
313
    col.erase(4);
4✔
314
    col.erase(3);
4✔
315
    col.erase(2);
4✔
316
    col.erase(1);
4✔
317
    col.erase(0);
4✔
318
    CHECK(ndx.is_empty());
4✔
319

320
    // Re-insert values
321
    col.add(s1);
4✔
322
    col.add(s2);
4✔
323
    col.add(s3);
4✔
324
    col.add(s4);
4✔
325
    col.add(s1); // duplicate value
4✔
326
    col.add(s5); // common prefix
4✔
327
    col.add(s6); // common prefix
4✔
328

329
    // Delete all entries
330
    // (in order to force constant ref updating)
331
    col.erase(0);
4✔
332
    col.erase(0);
4✔
333
    col.erase(0);
4✔
334
    col.erase(0);
4✔
335
    col.erase(0);
4✔
336
    col.erase(0);
4✔
337
    col.erase(0);
4✔
338
    CHECK(ndx.is_empty());
4✔
339
}
4✔
340

341
TEST_TYPES(StringIndex_Delete, string_column, nullable_string_column)
342
{
4✔
343
    TEST_TYPE test_resources;
4✔
344
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
345

346
    col.add(s1);
4✔
347
    col.add(s2);
4✔
348
    col.add(s3);
4✔
349
    col.add(s4);
4✔
350
    col.add(s1); // duplicate value
4✔
351

352
    // Create a new index on column
353
    const SearchIndex& ndx = *col.create_search_index();
4✔
354

355
    // Delete first item (in index)
356
    col.erase(1);
4✔
357

358
    CHECK_EQUAL(0, col.find_first(s1));
4✔
359
    CHECK_EQUAL(1, col.find_first(s3));
4✔
360
    CHECK_EQUAL(2, col.find_first(s4));
4✔
361
    CHECK_EQUAL(null_key, ndx.find_first(s2));
4✔
362

363
    // Delete last item (in index)
364
    col.erase(2);
4✔
365

366
    CHECK_EQUAL(0, col.find_first(s1));
4✔
367
    CHECK_EQUAL(1, col.find_first(s3));
4✔
368
    CHECK_EQUAL(not_found, col.find_first(s4));
4✔
369
    CHECK_EQUAL(not_found, col.find_first(s2));
4✔
370

371
    // Delete middle item (in index)
372
    col.erase(1);
4✔
373

374
    CHECK_EQUAL(0, col.find_first(s1));
4✔
375
    CHECK_EQUAL(not_found, col.find_first(s3));
4✔
376
    CHECK_EQUAL(not_found, col.find_first(s4));
4✔
377
    CHECK_EQUAL(not_found, col.find_first(s2));
4✔
378

379
    // Delete all items
380
    col.erase(0);
4✔
381
    col.erase(0);
4✔
382
    CHECK(ndx.is_empty());
4✔
383
}
4✔
384

385

386
TEST_TYPES(StringIndex_ClearEmpty, string_column, nullable_string_column)
387
{
4✔
388
    TEST_TYPE test_resources;
4✔
389
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
390

391
    // Create a new index on column
392
    const SearchIndex& ndx = *col.create_search_index();
4✔
393

394
    // Clear to remove all entries
395
    col.clear();
4✔
396
    CHECK(ndx.is_empty());
4✔
397
}
4✔
398

399
TEST_TYPES(StringIndex_Clear, string_column, nullable_string_column)
400
{
4✔
401
    TEST_TYPE test_resources;
4✔
402
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
403

404
    col.add(s1);
4✔
405
    col.add(s2);
4✔
406
    col.add(s3);
4✔
407
    col.add(s4);
4✔
408
    col.add(s1); // duplicate value
4✔
409
    col.add(s5); // common prefix
4✔
410
    col.add(s6); // common prefix
4✔
411

412
    // Create a new index on column
413
    const SearchIndex& ndx = *col.create_search_index();
4✔
414

415
    // Clear to remove all entries
416
    col.clear();
4✔
417
    CHECK(ndx.is_empty());
4✔
418

419
    // Re-insert values
420
    col.add(s1);
4✔
421
    col.add(s2);
4✔
422
    col.add(s3);
4✔
423
    col.add(s4);
4✔
424
    col.add(s1); // duplicate value
4✔
425
    col.add(s5); // common prefix
4✔
426
    col.add(s6); // common prefix
4✔
427

428
    const ObjKey r1 = ndx.find_first(s1);
4✔
429
    const ObjKey r2 = ndx.find_first(s2);
4✔
430
    const ObjKey r3 = ndx.find_first(s3);
4✔
431
    const ObjKey r4 = ndx.find_first(s4);
4✔
432
    const ObjKey r5 = ndx.find_first(s5);
4✔
433
    const ObjKey r6 = ndx.find_first(s6);
4✔
434

435
    CHECK_EQUAL(col.key(0), r1);
4✔
436
    CHECK_EQUAL(col.key(1), r2);
4✔
437
    CHECK_EQUAL(col.key(2), r3);
4✔
438
    CHECK_EQUAL(col.key(3), r4);
4✔
439
    CHECK_EQUAL(col.key(5), r5);
4✔
440
    CHECK_EQUAL(col.key(6), r6);
4✔
441
}
4✔
442

443

444
TEST_TYPES(StringIndex_Set, string_column, nullable_string_column)
445
{
4✔
446
    TEST_TYPE test_resources;
4✔
447
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
448

449
    col.add(s1);
4✔
450
    col.add(s2);
4✔
451
    col.add(s3);
4✔
452
    col.add(s4);
4✔
453
    col.add(s1); // duplicate value
4✔
454

455
    // Create a new index on column
456
    col.create_search_index();
4✔
457

458
    // Set top value
459
    col.set(0, s5);
4✔
460

461
    CHECK_EQUAL(0, col.find_first(s5));
4✔
462
    CHECK_EQUAL(1, col.find_first(s2));
4✔
463
    CHECK_EQUAL(2, col.find_first(s3));
4✔
464
    CHECK_EQUAL(3, col.find_first(s4));
4✔
465
    CHECK_EQUAL(4, col.find_first(s1));
4✔
466

467
    // Set bottom value
468
    col.set(4, s6);
4✔
469

470
    CHECK_EQUAL(not_found, col.find_first(s1));
4✔
471
    CHECK_EQUAL(0, col.find_first(s5));
4✔
472
    CHECK_EQUAL(1, col.find_first(s2));
4✔
473
    CHECK_EQUAL(2, col.find_first(s3));
4✔
474
    CHECK_EQUAL(3, col.find_first(s4));
4✔
475
    CHECK_EQUAL(4, col.find_first(s6));
4✔
476

477
    // Set middle value
478
    col.set(2, s7);
4✔
479

480
    CHECK_EQUAL(not_found, col.find_first(s3));
4✔
481
    CHECK_EQUAL(not_found, col.find_first(s1));
4✔
482
    CHECK_EQUAL(0, col.find_first(s5));
4✔
483
    CHECK_EQUAL(1, col.find_first(s2));
4✔
484
    CHECK_EQUAL(2, col.find_first(s7));
4✔
485
    CHECK_EQUAL(3, col.find_first(s4));
4✔
486
    CHECK_EQUAL(4, col.find_first(s6));
4✔
487
}
4✔
488

489
TEST_TYPES(StringIndex_Count, string_column, nullable_string_column)
490
{
4✔
491
    TEST_TYPE test_resources;
4✔
492
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
493

494
    col.add(s1);
4✔
495
    col.add(s2);
4✔
496
    col.add(s2);
4✔
497
    col.add(s3);
4✔
498
    col.add(s3);
4✔
499
    col.add(s3);
4✔
500
    col.add(s4);
4✔
501
    col.add(s4);
4✔
502
    col.add(s4);
4✔
503
    col.add(s4);
4✔
504

505
    // Create a new index on column
506
    col.create_search_index();
4✔
507

508
    // Counts
509
    const size_t c0 = col.count(s5);
4✔
510
    const size_t c1 = col.count(s1);
4✔
511
    const size_t c2 = col.count(s2);
4✔
512
    const size_t c3 = col.count(s3);
4✔
513
    const size_t c4 = col.count(s4);
4✔
514
    CHECK_EQUAL(0, c0);
4✔
515
    CHECK_EQUAL(1, c1);
4✔
516
    CHECK_EQUAL(2, c2);
4✔
517
    CHECK_EQUAL(3, c3);
4✔
518
    CHECK_EQUAL(4, c4);
4✔
519
}
4✔
520

521
TEST_TYPES(StringIndex_Distinct, string_column, nullable_string_column)
522
{
4✔
523
    TEST_TYPE test_resources;
4✔
524
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
525

526
    col.add(s1);
4✔
527
    col.add(s2);
4✔
528
    col.add(s2);
4✔
529
    col.add(s3);
4✔
530
    col.add(s3);
4✔
531
    col.add(s3);
4✔
532
    col.add(s4);
4✔
533
    col.add(s4);
4✔
534
    col.add(s4);
4✔
535
    col.add(s4);
4✔
536

537
    // Create a new index on column
538
    const SearchIndex* ndx = col.create_search_index();
4✔
539
    CHECK(ndx->has_duplicate_values());
4✔
540
}
4✔
541

542
TEST_TYPES(StringIndex_FindAllNoCopy, string_column, nullable_string_column)
543
{
4✔
544
    TEST_TYPE test_resources;
4✔
545
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
546

547
    col.add(s1);
4✔
548
    col.add(s2);
4✔
549
    col.add(s2);
4✔
550
    col.add(s3);
4✔
551
    col.add(s3);
4✔
552
    col.add(s3);
4✔
553
    col.add(s4);
4✔
554
    col.add(s4);
4✔
555
    col.add(s4);
4✔
556
    col.add(s4);
4✔
557

558
    // Create a new index on column
559
    const SearchIndex& ndx = *col.create_search_index();
4✔
560

561
    InternalFindResult ref_2;
4✔
562
    FindRes res1 = ndx.find_all_no_copy(StringData("not there"), ref_2);
4✔
563
    CHECK_EQUAL(FindRes_not_found, res1);
4✔
564

565
    FindRes res2 = ndx.find_all_no_copy(s1, ref_2);
4✔
566
    CHECK_EQUAL(FindRes_single, res2);
4✔
567
    CHECK_EQUAL(0, ref_2.payload);
4✔
568

569
    FindRes res3 = ndx.find_all_no_copy(s4, ref_2);
4✔
570
    CHECK_EQUAL(FindRes_column, res3);
4✔
571
    BPlusTree<ObjKey> results(Allocator::get_default());
4✔
572
    results.init_from_ref(ref_type(ref_2.payload));
4✔
573

574
    CHECK_EQUAL(4, ref_2.end_ndx - ref_2.start_ndx);
4✔
575
    CHECK_EQUAL(4, results.size());
4✔
576
    CHECK_EQUAL(col.key(6), results.get(0));
4✔
577
    CHECK_EQUAL(col.key(7), results.get(1));
4✔
578
    CHECK_EQUAL(col.key(8), results.get(2));
4✔
579
    CHECK_EQUAL(col.key(9), results.get(3));
4✔
580
}
4✔
581

582
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
583
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
584
// of copying it to you, as a performance optimization.
585
TEST(StringIndex_FindAllNoCopy2_Int)
586
{
2✔
587
    // Create a column with duplcate values
588
    column<Int> test_resources;
2✔
589
    auto col = test_resources.get_column();
2✔
590

591
    for (auto i : ints)
2✔
592
        col.add(i);
20✔
593

594
    // Create a new index on column
595
    col.create_search_index();
2✔
596
    const SearchIndex& ndx = *col.create_search_index();
2✔
597
    InternalFindResult results;
2✔
598

599
    for (auto i : ints) {
20✔
600
        FindRes res = ndx.find_all_no_copy(i, results);
20✔
601

602
        size_t real = 0;
20✔
603
        for (auto j : ints) {
200✔
604
            if (i == j)
200✔
605
                real++;
24✔
606
        }
200✔
607

608
        if (real == 1) {
20✔
609
            CHECK_EQUAL(res, FindRes_single);
16✔
610
            CHECK_EQUAL(i, ints[size_t(results.payload)]);
16✔
611
        }
16✔
612
        else if (real > 1) {
4✔
613
            CHECK_EQUAL(FindRes_column, res);
4✔
614
            const IntegerColumn results_column(Allocator::get_default(), ref_type(results.payload));
4✔
615
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
4✔
616
            CHECK_EQUAL(real, results_column.size());
4✔
617
            for (size_t y = 0; y < real; y++)
12✔
618
                CHECK_EQUAL(i, ints[size_t(results_column.get(y))]);
8✔
619
        }
4✔
620
    }
20✔
621
}
2✔
622

623
// If a column contains a specific value in multiple rows, then the index will store a list of these row numbers
624
// in form of a column. If you call find_all() on an index, it will return a *reference* to that column instead
625
// of copying it to you, as a performance optimization.
626
TEST(StringIndex_FindAllNoCopy2_IntNull)
627
{
2✔
628
    // Create a column with duplcate values
629
    column<Int> test_resources(true);
2✔
630
    auto col = test_resources.get_column();
2✔
631

632
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++)
8✔
633
        col.add(ints[t]);
6✔
634
    col.add_null();
2✔
635

636
    // Create a new index on column
637
    const SearchIndex& ndx = *col.create_search_index();
2✔
638
    InternalFindResult results;
2✔
639

640
    for (size_t t = 0; t < sizeof(ints) / sizeof(ints[0]); t++) {
8✔
641
        FindRes res = ndx.find_all_no_copy(ints[t], results);
6✔
642

643
        size_t real = 0;
6✔
644
        for (size_t y = 0; y < sizeof(ints) / sizeof(ints[0]); y++) {
24✔
645
            if (ints[t] == ints[y])
18✔
646
                real++;
6✔
647
        }
18✔
648

649
        if (real == 1) {
6✔
650
            CHECK_EQUAL(res, FindRes_single);
6✔
651
            CHECK_EQUAL(ints[t], ints[size_t(results.payload)]);
6✔
652
        }
6✔
653
        else if (real > 1) {
×
654
            CHECK_EQUAL(FindRes_column, res);
×
655
            const IntegerColumn results2(Allocator::get_default(), ref_type(results.payload));
×
656
            CHECK_EQUAL(real, results.end_ndx - results.start_ndx);
×
657
            CHECK_EQUAL(real, results2.size());
×
658
            for (size_t y = 0; y < real; y++)
×
659
                CHECK_EQUAL(ints[t], ints[size_t(results2.get(y))]);
×
660
        }
×
661
    }
6✔
662

663
    FindRes res = ndx.find_all_no_copy(null{}, results);
2✔
664
    CHECK_EQUAL(FindRes_single, res);
2✔
665
    CHECK_EQUAL(results.payload, col.size() - 1);
2✔
666
}
2✔
667

668
TEST_TYPES(StringIndex_FindAllNoCopyCommonPrefixStrings, string_column, nullable_string_column)
669
{
4✔
670
    TEST_TYPE test_resources;
4✔
671
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
672
    const SearchIndex& ndx = *col.create_search_index();
4✔
673

674
    auto test_prefix_find = [&](std::string prefix) {
12✔
675
        std::string prefix_b = prefix + "b";
12✔
676
        std::string prefix_c = prefix + "c";
12✔
677
        std::string prefix_d = prefix + "d";
12✔
678
        std::string prefix_e = prefix + "e";
12✔
679
        StringData spb(prefix_b);
12✔
680
        StringData spc(prefix_c);
12✔
681
        StringData spd(prefix_d);
12✔
682
        StringData spe(prefix_e);
12✔
683

684
        size_t start_row = col.size();
12✔
685
        col.add(spb);
12✔
686
        col.add(spc);
12✔
687
        col.add(spc);
12✔
688
        col.add(spe);
12✔
689
        col.add(spe);
12✔
690
        col.add(spe);
12✔
691

692
        InternalFindResult results;
12✔
693
        FindRes res = ndx.find_all_no_copy(spb, results);
12✔
694
        CHECK_EQUAL(res, FindRes_single);
12✔
695
        CHECK_EQUAL(results.payload, start_row);
12✔
696

697
        res = ndx.find_all_no_copy(spc, results);
12✔
698
        CHECK_EQUAL(res, FindRes_column);
12✔
699
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 2);
12✔
700
        const IntegerColumn results_c(Allocator::get_default(), ref_type(results.payload));
12✔
701
        CHECK_EQUAL(results_c.get(results.start_ndx), start_row + 1);
12✔
702
        CHECK_EQUAL(results_c.get(results.start_ndx + 1), start_row + 2);
12✔
703
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx))), spc);
12✔
704
        CHECK_EQUAL(col.get(size_t(results_c.get(results.start_ndx + 1))), spc);
12✔
705

706
        res = ndx.find_all_no_copy(spd, results);
12✔
707
        CHECK_EQUAL(res, FindRes_not_found);
12✔
708

709
        res = ndx.find_all_no_copy(spe, results);
12✔
710
        CHECK_EQUAL(res, FindRes_column);
12✔
711
        CHECK_EQUAL(results.end_ndx - results.start_ndx, 3);
12✔
712
        const IntegerColumn results_e(Allocator::get_default(), ref_type(results.payload));
12✔
713
        CHECK_EQUAL(results_e.get(results.start_ndx), start_row + 3);
12✔
714
        CHECK_EQUAL(results_e.get(results.start_ndx + 1), start_row + 4);
12✔
715
        CHECK_EQUAL(results_e.get(results.start_ndx + 2), start_row + 5);
12✔
716
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx))), spe);
12✔
717
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 1))), spe);
12✔
718
        CHECK_EQUAL(col.get(size_t(results_e.get(results.start_ndx + 2))), spe);
12✔
719
    };
12✔
720

721
    std::string std_max(StringIndex::s_max_offset, 'a');
4✔
722
    std::string std_over_max = std_max + "a";
4✔
723
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
4✔
724

725
    test_prefix_find(std_max);
4✔
726
    test_prefix_find(std_over_max);
4✔
727
    test_prefix_find(std_under_max);
4✔
728
}
4✔
729

730
TEST(StringIndex_Count_Int)
731
{
2✔
732
    // Create a column with duplicate values
733
    column<Int> test_resources;
2✔
734
    auto col = test_resources.get_column();
2✔
735

736
    for (auto i : ints)
2✔
737
        col.add(i);
20✔
738

739
    // Create a new index on column
740
    const SearchIndex& ndx = *col.create_search_index();
2✔
741

742
    for (auto i : ints) {
20✔
743
        size_t count = ndx.count(i);
20✔
744

745
        size_t real = 0;
20✔
746
        for (auto j : ints) {
200✔
747
            if (i == j)
200✔
748
                real++;
24✔
749
        }
200✔
750

751
        CHECK_EQUAL(real, count);
20✔
752
    }
20✔
753
}
2✔
754

755

756
TEST(StringIndex_Distinct_Int)
757
{
2✔
758
    // Create a column with duplicate values
759
    column<Int> test_resources;
2✔
760
    auto col = test_resources.get_column();
2✔
761

762
    for (auto i : ints)
2✔
763
        col.add(i);
20✔
764

765
    // Create a new index on column
766
    auto ndx = col.create_search_index();
2✔
767
    CHECK(ndx->has_duplicate_values());
2✔
768
}
2✔
769

770

771
TEST(StringIndex_Set_Add_Erase_Insert_Int)
772
{
2✔
773
    column<Int> test_resources;
2✔
774
    auto col = test_resources.get_column();
2✔
775

776
    col.add(1);
2✔
777
    col.add(2);
2✔
778
    col.add(3);
2✔
779
    col.add(2);
2✔
780

781
    // Create a new index on column
782
    const SearchIndex& ndx = *col.create_search_index();
2✔
783

784
    ObjKey f = ndx.find_first(int64_t(2));
2✔
785
    CHECK_EQUAL(col.key(1), f);
2✔
786

787
    col.set(1, 5);
2✔
788

789
    f = ndx.find_first(int64_t(2));
2✔
790
    CHECK_EQUAL(col.key(3), f);
2✔
791

792
    col.erase(1);
2✔
793

794
    f = ndx.find_first(int64_t(2));
2✔
795
    CHECK_EQUAL(col.key(2), f);
2✔
796

797
    col.insert(1, 5);
2✔
798
    CHECK_EQUAL(col.get(1), 5);
2✔
799

800
    f = ndx.find_first(int64_t(2));
2✔
801
    CHECK_EQUAL(col.key(3), f);
2✔
802

803
    col.add(7);
2✔
804
    CHECK_EQUAL(col.get(4), 7);
2✔
805
    col.set(4, 10);
2✔
806
    CHECK_EQUAL(col.get(4), 10);
2✔
807

808
    f = ndx.find_first(int64_t(10));
2✔
809
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
810

811
    col.add(9);
2✔
812
    f = ndx.find_first(int64_t(9));
2✔
813
    CHECK_EQUAL(col.key(col.size() - 1), f);
2✔
814

815
    col.clear();
2✔
816
    f = ndx.find_first(int64_t(2));
2✔
817
    CHECK_EQUAL(null_key, f);
2✔
818
}
2✔
819

820
TEST(StringIndex_FuzzyTest_Int)
821
{
2✔
822
    column<Int> test_resources;
2✔
823
    auto col = test_resources.get_column();
2✔
824
    Random random(random_int<unsigned long>());
2✔
825
    const size_t n = static_cast<size_t>(1.2 * REALM_MAX_BPNODE_SIZE);
2✔
826

827
    col.create_search_index();
2✔
828

829
    for (size_t t = 0; t < n; ++t) {
2,402✔
830
        col.add(random.draw_int_max(0xffffffffffffffff));
2,400✔
831
    }
2,400✔
832

833
    for (size_t t = 0; t < n; ++t) {
2,402✔
834
        int64_t r;
2,400✔
835
        if (random.draw_bool())
2,400✔
836
            r = col.get(t);
1,167✔
837
        else
1,233✔
838
            r = random.draw_int_max(0xffffffffffffffff);
1,233✔
839

840
        size_t m = col.find_first(r);
2,400✔
841
        for (size_t t_2 = 0; t_2 < n; ++t_2) {
2,187,942✔
842
            if (col.get(t_2) == r) {
2,186,709✔
843
                CHECK_EQUAL(t_2, m);
1,167✔
844
                break;
1,167✔
845
            }
1,167✔
846
        }
2,186,709✔
847
    }
2,400✔
848
}
2✔
849

850
namespace {
851

852
// Generate string where the bit pattern in bits is converted to NUL bytes. E.g. (length=2):
853
// bits=0 -> "\0\0", bits=1 -> "\x\0", bits=2 -> "\0\x", bits=3 -> "\x\x", where x is a random byte
854
StringData create_string_with_nuls(const size_t bits, const size_t length, char* tmp, Random& random)
855
{
×
856
    for (size_t i = 0; i < length; ++i) {
×
857
        bool insert_nul_at_pos = (bits & (size_t(1) << i)) == 0;
×
858
        if (insert_nul_at_pos) {
×
859
            tmp[i] = '\0';
×
860
        }
×
861
        else {
×
862
            // Avoid stray \0 chars, since we are already testing all combinations.
863
            // All casts are necessary to preserve the bitpattern.
864
            tmp[i] = static_cast<char>(static_cast<unsigned char>(random.draw_int<unsigned int>(1, UCHAR_MAX)));
×
865
        }
×
866
    }
×
867
    return StringData(tmp, length);
×
868
}
×
869

870
} // anonymous namespace
871

872

873
// Test for generated strings of length 1..16 with all combinations of embedded NUL bytes
874
TEST_TYPES_IF(StringIndex_EmbeddedZeroesCombinations, TEST_DURATION > 1, string_column, nullable_string_column)
875
{
×
876
    TEST_TYPE test_resources;
×
877
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
878
    const SearchIndex& ndx = *col.create_search_index();
×
879

880
    constexpr unsigned int seed = 42;
×
881
    const size_t MAX_LENGTH = 16; // Test medium
×
882
    char tmp[MAX_LENGTH];         // this is a bit of a hack, that relies on the string being copied in column.add()
×
883

884
    for (size_t length = 1; length <= MAX_LENGTH; ++length) {
×
885

886
        {
×
887
            Random random(seed);
×
888
            const size_t combinations = size_t(1) << length;
×
889
            for (size_t i = 0; i < combinations; ++i) {
×
890
                StringData str = create_string_with_nuls(i, length, tmp, random);
×
891
                col.add(str);
×
892
            }
×
893
        }
×
894

895
        // check index up to this length
896
        size_t expected_index = 0;
×
897
        for (size_t l = 1; l <= length; ++l) {
×
898
            Random random(seed);
×
899
            const size_t combinations = size_t(1) << l;
×
900
            for (size_t i = 0; i < combinations; ++i) {
×
901
                StringData needle = create_string_with_nuls(i, l, tmp, random);
×
902
                CHECK_EQUAL(ndx.find_first(needle), col.key(expected_index));
×
903
                CHECK(strncmp(col.get(expected_index).data(), needle.data(), l) == 0);
×
904
                CHECK_EQUAL(col.get(expected_index).size(), needle.size());
×
905
                expected_index++;
×
906
            }
×
907
        }
×
908
    }
×
909
}
×
910

911
// Tests for a bug with strings containing zeroes
912
TEST_TYPES(StringIndex_EmbeddedZeroes, string_column, nullable_string_column)
913
{
4✔
914
    TEST_TYPE test_resources;
4✔
915
    typename TEST_TYPE::ColumnTestType& col2 = test_resources.get_column();
4✔
916
    const SearchIndex& ndx2 = *col2.create_search_index();
4✔
917

918
    // FIXME: re-enable once embedded nuls work
919
    col2.add(StringData("\0", 1));
4✔
920
    col2.add(StringData("\1", 1));
4✔
921
    col2.add(StringData("\0\0", 2));
4✔
922
    col2.add(StringData("\0\1", 2));
4✔
923
    col2.add(StringData("\1\0", 2));
4✔
924

925
    CHECK_EQUAL(ndx2.find_first(StringData("\0", 1)), col2.key(0));
4✔
926
    CHECK_EQUAL(ndx2.find_first(StringData("\1", 1)), col2.key(1));
4✔
927
    CHECK_EQUAL(ndx2.find_first(StringData("\2", 1)), null_key);
4✔
928
    CHECK_EQUAL(ndx2.find_first(StringData("\0\0", 2)), col2.key(2));
4✔
929
    CHECK_EQUAL(ndx2.find_first(StringData("\0\1", 2)), col2.key(3));
4✔
930
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0", 2)), col2.key(4));
4✔
931
    CHECK_EQUAL(ndx2.find_first(StringData("\1\0\0", 3)), null_key);
4✔
932

933
    // Integer index (uses String index internally)
934
    int64_t v = 1ULL << 41;
4✔
935
    column<Int> test_resources_1;
4✔
936
    auto col = test_resources_1.get_column();
4✔
937
    const SearchIndex& ndx = *col.create_search_index();
4✔
938
    col.add(1ULL << 40);
4✔
939
    auto f = ndx.find_first(v);
4✔
940
    CHECK_EQUAL(f, null_key);
4✔
941
}
4✔
942

943
TEST(StringIndex_Null)
944
{
2✔
945
    nullable_string_column test_resources;
2✔
946
    auto& col = test_resources.get_column();
2✔
947

948
    col.add("");
2✔
949
    col.add(realm::null());
2✔
950

951
    const SearchIndex& ndx = *col.create_search_index();
2✔
952

953
    auto r1 = ndx.find_first(realm::null());
2✔
954
    CHECK_EQUAL(r1, col.key(1));
2✔
955
}
2✔
956

957

958
TEST_TYPES(StringIndex_Zero_Crash, string_column, nullable_string_column)
959
{
4✔
960
    bool nullable = TEST_TYPE::is_nullable();
4✔
961

962
    // StringIndex could crash if strings ended with one or more 0-bytes
963
    Table table;
4✔
964
    auto col = table.add_column(type_String, "strings", nullable);
4✔
965

966
    auto k0 = table.create_object().set(col, StringData("")).get_key();
4✔
967
    auto k1 = table.create_object().set(col, StringData("\0", 1)).get_key();
4✔
968
    auto k2 = table.create_object().set(col, StringData("\0\0", 2)).get_key();
4✔
969
    table.add_search_index(col);
4✔
970

971
    ObjKey t;
4✔
972

973
    t = table.find_first_string(col, StringData(""));
4✔
974
    CHECK_EQUAL(k0, t);
4✔
975

976
    t = table.find_first_string(col, StringData("\0", 1));
4✔
977
    CHECK_EQUAL(k1, t);
4✔
978

979
    t = table.find_first_string(col, StringData("\0\0", 2));
4✔
980
    CHECK_EQUAL(k2, t);
4✔
981
}
4✔
982

983
TEST_TYPES(StringIndex_Zero_Crash2, std::true_type, std::false_type)
984
{
4✔
985
    Random random(random_int<unsigned long>());
4✔
986

987
    constexpr bool add_common_prefix = TEST_TYPE::value;
4✔
988

989
    for (size_t iter = 0; iter < 10 + TEST_DURATION * 100; iter++) {
44✔
990
        // StringIndex could crash if strings ended with one or more 0-bytes
991
        Table table;
40✔
992
        auto col = table.add_column(type_String, "string", true);
40✔
993

994
        table.add_search_index(col);
40✔
995

996
        for (size_t i = 0; i < 100 + TEST_DURATION * 1000; i++) {
4,040✔
997
            unsigned char action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
998
            if (action == 0) {
4,000✔
999
                table.remove_search_index(col);
44✔
1000
                table.add_search_index(col);
44✔
1001
            }
44✔
1002
            else if (action > 48 && table.size() < 10) {
3,956✔
1003
                // Generate string with equal probability of being empty, null, short, medium and long, and with
1004
                // their contents having equal proability of being either random or a duplicate of a previous
1005
                // string. When it's random, each char must have equal probability of being 0 or non-0e
1006
                static std::string buf =
2,000✔
1007
                    "This string is around 90 bytes long, which falls in the long-string type of Realm strings";
2,000✔
1008

1009
                std::string copy = buf;
2,000✔
1010

1011
                static std::string buf2 =
2,000✔
1012
                    "                                                                                         ";
2,000✔
1013
                std::string copy2 = buf2;
2,000✔
1014
                StringData sd;
2,000✔
1015

1016
                size_t len = random.draw_int_max<size_t>(3);
2,000✔
1017
                if (len == 0)
2,000✔
1018
                    len = 0;
508✔
1019
                else if (len == 1)
1,492✔
1020
                    len = 7;
464✔
1021
                else if (len == 2)
1,028✔
1022
                    len = 27;
521✔
1023
                else
507✔
1024
                    len = random.draw_int_max<size_t>(90);
507✔
1025

1026
                copy = copy.substr(0, len);
2,000✔
1027
                if (add_common_prefix) {
2,000✔
1028
                    std::string prefix(StringIndex::s_max_offset, 'a');
997✔
1029
                    copy = prefix + copy;
997✔
1030
                }
997✔
1031

1032
                if (random.draw_int_max<int>(1) == 0) {
2,000✔
1033
                    // duplicate string
1034
                    sd = StringData(copy);
1,025✔
1035
                }
1,025✔
1036
                else {
975✔
1037
                    // random string
1038
                    for (size_t t = 0; t < len; t++) {
20,644✔
1039
                        if (random.draw_int_max<int>(100) > 20)
19,669✔
1040
                            copy2[t] = 0; // zero byte
15,647✔
1041
                        else
4,022✔
1042
                            copy2[t] = static_cast<char>(random.draw_int<int>()); // random byte
4,022✔
1043
                    }
19,669✔
1044
                    // no generated string can equal "null" (our vector magic value for null) because
1045
                    // len == 4 is not possible
1046
                    copy2 = copy2.substr(0, len);
975✔
1047
                    if (add_common_prefix) {
975✔
1048
                        std::string prefix(StringIndex::s_max_offset, 'a');
472✔
1049
                        copy2 = prefix + copy2;
472✔
1050
                    }
472✔
1051
                    sd = StringData(copy2);
975✔
1052
                }
975✔
1053

1054
                bool done = false;
2,000✔
1055
                do {
2,002✔
1056
                    int64_t key_val = random.draw_int_max<int64_t>(10000);
2,002✔
1057
                    try {
2,002✔
1058
                        table.create_object(ObjKey(key_val)).set(col, sd);
2,002✔
1059
                        done = true;
2,002✔
1060
                    }
2,002✔
1061
                    catch (...) {
2,002✔
1062
                    }
2✔
1063
                } while (!done);
2,002✔
1064
                table.verify();
2,000✔
1065
            }
2,000✔
1066
            else if (table.size() > 0) {
1,956✔
1067
                // delete
1068
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
1,758✔
1069
                Obj obj = table.get_object(row);
1,758✔
1070
                obj.remove();
1,758✔
1071
            }
1,758✔
1072

1073
            action = static_cast<unsigned char>(random.draw_int_max<unsigned int>(100));
4,000✔
1074
            if (table.size() > 0) {
4,000✔
1075
                // Search for value that exists
1076
                size_t row = random.draw_int_max<size_t>(table.size() - 1);
3,585✔
1077
                Obj obj = table.get_object(row);
3,585✔
1078
                StringData sd = obj.get<String>(col);
3,585✔
1079
                ObjKey t = table.find_first_string(col, sd);
3,585✔
1080
                StringData sd2 = table.get_object(t).get<String>(col);
3,585✔
1081
                CHECK_EQUAL(sd, sd2);
3,585✔
1082
            }
3,585✔
1083
        }
4,000✔
1084
    }
40✔
1085
}
4✔
1086

1087
TEST(StringIndex_Integer_Increasing)
1088
{
2✔
1089
    const size_t rows = 2000 + 1000000 * TEST_DURATION;
2✔
1090

1091
    // StringIndex could crash if strings ended with one or more 0-bytes
1092
    Table table;
2✔
1093
    auto col = table.add_column(type_Int, "int");
2✔
1094
    table.add_search_index(col);
2✔
1095

1096
    std::multiset<int64_t> reference;
2✔
1097

1098
    for (size_t row = 0; row < rows; row++) {
4,002✔
1099
        int64_t r = fastrand((TEST_DURATION == 0) ? 2000 : 0x100000);
4,000✔
1100
        table.create_object().set(col, r);
4,000✔
1101
        reference.insert(r);
4,000✔
1102
    }
4,000✔
1103

1104
    for (auto obj : table) {
4,000✔
1105
        int64_t v = obj.get<Int>(col);
4,000✔
1106
        size_t c = table.count_int(col, v);
4,000✔
1107
        size_t ref_count = reference.count(v);
4,000✔
1108
        CHECK_EQUAL(c, ref_count);
4,000✔
1109
    }
4,000✔
1110
}
2✔
1111

1112
TEST_TYPES(StringIndex_Duplicate_Values, string_column, nullable_string_column)
1113
{
4✔
1114
    TEST_TYPE test_resources;
4✔
1115
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1116

1117
    col.add(s1);
4✔
1118
    col.add(s2);
4✔
1119
    col.add(s3);
4✔
1120
    col.add(s4);
4✔
1121

1122
    // Create a new index on column
1123
    const SearchIndex& ndx = *col.create_search_index();
4✔
1124

1125
    CHECK(!ndx.has_duplicate_values());
4✔
1126

1127
    col.add(s1); // duplicate value
4✔
1128

1129
    CHECK(ndx.has_duplicate_values());
4✔
1130

1131
    // remove and test again.
1132
    col.erase(4);
4✔
1133
    CHECK(!ndx.has_duplicate_values());
4✔
1134
    col.add(s1);
4✔
1135
    CHECK(ndx.has_duplicate_values());
4✔
1136
    col.erase(0);
4✔
1137
    CHECK(!ndx.has_duplicate_values());
4✔
1138
    col.clear();
4✔
1139

1140
    // check emptied set
1141
    CHECK(ndx.is_empty());
4✔
1142
    CHECK(!ndx.has_duplicate_values());
4✔
1143

1144
    const size_t num_rows = 100;
4✔
1145

1146
    for (size_t i = 0; i < num_rows; ++i) {
404✔
1147
        std::string to_insert(util::to_string(i));
400✔
1148
        col.add(to_insert);
400✔
1149
    }
400✔
1150
    CHECK(!ndx.has_duplicate_values());
4✔
1151

1152
    std::string a_string = "a";
4✔
1153
    for (size_t i = 0; i < num_rows; ++i) {
404✔
1154
        col.add(a_string);
400✔
1155
        a_string += "a";
400✔
1156
    }
400✔
1157
    std::string str_num_rows(util::to_string(num_rows));
4✔
1158
    CHECK(!ndx.has_duplicate_values());
4✔
1159
    col.add(a_string);
4✔
1160
    col.add(a_string);
4✔
1161
    CHECK(ndx.has_duplicate_values());
4✔
1162
    col.erase(col.size() - 1);
4✔
1163
    CHECK(!ndx.has_duplicate_values());
4✔
1164

1165
    // Insert into the middle unique value of num_rows
1166
    col.insert(num_rows / 2, str_num_rows);
4✔
1167

1168
    CHECK(!ndx.has_duplicate_values());
4✔
1169

1170
    // Set the next element to be num_rows too
1171
    col.set(num_rows / 2 + 1, str_num_rows);
4✔
1172

1173
    CHECK(ndx.has_duplicate_values());
4✔
1174

1175
    col.clear();
4✔
1176
    CHECK(!ndx.has_duplicate_values());
4✔
1177
    CHECK(col.size() == 0);
4✔
1178
}
4✔
1179

1180
TEST_TYPES(StringIndex_MaxBytes, string_column, nullable_string_column)
1181
{
4✔
1182
    TEST_TYPE test_resources;
4✔
1183
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1184

1185
    std::string std_max(StringIndex::s_max_offset, 'a');
4✔
1186
    std::string std_over_max(std_max + "a");
4✔
1187
    std::string std_under_max(StringIndex::s_max_offset >> 1, 'a');
4✔
1188
    StringData max(std_max);
4✔
1189
    StringData over_max(std_over_max);
4✔
1190
    StringData under_max(std_under_max);
4✔
1191

1192
    const SearchIndex& ndx = *col.create_search_index();
4✔
1193

1194
    CHECK_EQUAL(col.size(), 0);
4✔
1195

1196
    auto duplicate_check = [&](size_t num_dups, StringData s) {
60✔
1197
        CHECK(col.size() == 0);
60✔
1198
        for (size_t i = 0; i < num_dups; ++i) {
36,192✔
1199
            col.add(s);
36,132✔
1200
        }
36,132✔
1201
        CHECK_EQUAL(col.size(), num_dups);
60✔
1202
        CHECK(ndx.has_duplicate_values() == (num_dups > 1));
60✔
1203
        CHECK_EQUAL(col.get(0), s);
60✔
1204
        CHECK_EQUAL(col.count(s), num_dups);
60✔
1205
        CHECK_EQUAL(col.find_first(s), 0);
60✔
1206
        col.clear();
60✔
1207
    };
60✔
1208

1209
    std::vector<size_t> num_duplicates_list = {
4✔
1210
        1, 10, REALM_MAX_BPNODE_SIZE - 1, REALM_MAX_BPNODE_SIZE, REALM_MAX_BPNODE_SIZE + 1,
4✔
1211
    };
4✔
1212
    for (auto& dups : num_duplicates_list) {
20✔
1213
        duplicate_check(dups, under_max);
20✔
1214
        duplicate_check(dups, max);
20✔
1215
        duplicate_check(dups, over_max);
20✔
1216
    }
20✔
1217
}
4✔
1218

1219

1220
// There is a corner case where two very long strings are
1221
// inserted into the string index which are identical except
1222
// for the characters at the end (they have an identical very
1223
// long prefix). This was causing a stack overflow because of
1224
// the recursive nature of the insert function.
1225
TEST_TYPES(StringIndex_InsertLongPrefix, string_column, nullable_string_column)
1226
{
4✔
1227
    TEST_TYPE test_resources;
4✔
1228
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1229
    const SearchIndex& ndx = *col.create_search_index();
4✔
1230

1231
    col.add("test_index_string1");
4✔
1232
    col.add("test_index_string2");
4✔
1233

1234
    CHECK_EQUAL(col.find_first("test_index_string1"), 0);
4✔
1235
    CHECK_EQUAL(col.find_first("test_index_string2"), 1);
4✔
1236

1237
    std::string std_base(107, 'a');
4✔
1238
    std::string std_base_b = std_base + "b";
4✔
1239
    std::string std_base_c = std_base + "c";
4✔
1240
    StringData base_b(std_base_b);
4✔
1241
    StringData base_c(std_base_c);
4✔
1242
    col.add(base_b);
4✔
1243
    ndx.verify();
4✔
1244
    col.add(base_c);
4✔
1245
    ndx.verify();
4✔
1246

1247
    CHECK_EQUAL(col.find_first(base_b), 2);
4✔
1248
    CHECK_EQUAL(col.find_first(base_c), 3);
4✔
1249

1250
    // To trigger the bug, the length must be more than 10000.
1251
    // Array::destroy_deep() will stack overflow at around recursion depths of
1252
    // lengths > 90000 on mac and less on android devices.
1253
    std::string std_base2(100000, 'a');
4✔
1254
    std::string std_base2_b = std_base2 + "b";
4✔
1255
    std::string std_base2_c = std_base2 + "c";
4✔
1256
    StringData base2(std_base2);
4✔
1257
    StringData base2_b(std_base2_b);
4✔
1258
    StringData base2_c(std_base2_c);
4✔
1259
    col.add(base2_b);
4✔
1260
    ndx.verify();
4✔
1261
    col.add(base2_c);
4✔
1262
    ndx.verify();
4✔
1263

1264
    CHECK_EQUAL(col.find_first(base2_b), 4);
4✔
1265
    CHECK_EQUAL(col.find_first(base2_c), 5);
4✔
1266

1267
    col.add(base2);
4✔
1268
    CHECK(!ndx.has_duplicate_values());
4✔
1269
    ndx.verify();
4✔
1270
    col.add(base2_b); // adds a duplicate in the middle of the list
4✔
1271

1272
    CHECK(ndx.has_duplicate_values());
4✔
1273
    std::vector<ObjKey> find_all_result;
4✔
1274
    CHECK_EQUAL(col.find_first(base2_b), 4);
4✔
1275
    ndx.find_all(find_all_result, base2_b);
4✔
1276
    CHECK_EQUAL(find_all_result.size(), 2);
4✔
1277
    CHECK_EQUAL(find_all_result[0], col.key(4));
4✔
1278
    CHECK_EQUAL(find_all_result[1], col.key(7));
4✔
1279
    find_all_result.clear();
4✔
1280
    CHECK_EQUAL(ndx.count(base2_b), 2);
4✔
1281
    col.verify();
4✔
1282

1283
    col.erase(7);
4✔
1284
    CHECK_EQUAL(col.find_first(base2_b), 4);
4✔
1285
    CHECK_EQUAL(ndx.count(base2_b), 1);
4✔
1286
    ndx.find_all(find_all_result, base2_b);
4✔
1287
    CHECK_EQUAL(find_all_result.size(), 1);
4✔
1288
    CHECK_EQUAL(find_all_result[0], col.key(4));
4✔
1289
    find_all_result.clear();
4✔
1290
    col.verify();
4✔
1291

1292
    col.set(6, base2_b);
4✔
1293
    CHECK_EQUAL(ndx.count(base2_b), 2);
4✔
1294
    CHECK_EQUAL(col.find_first(base2_b), 4);
4✔
1295
    ndx.find_all(find_all_result, base2_b);
4✔
1296
    CHECK_EQUAL(find_all_result.size(), 2);
4✔
1297
    CHECK_EQUAL(find_all_result[0], col.key(4));
4✔
1298
    CHECK_EQUAL(find_all_result[1], col.key(6));
4✔
1299
    col.verify();
4✔
1300

1301
    col.clear(); // calls recursive function Array::destroy_deep()
4✔
1302
}
4✔
1303

1304
TEST_TYPES(StringIndex_InsertLongPrefixAndQuery, string_column, nullable_string_column)
1305
{
4✔
1306
    constexpr int half_node_size = REALM_MAX_BPNODE_SIZE / 2;
4✔
1307
    bool nullable_column = TEST_TYPE::is_nullable();
4✔
1308
    Group g;
4✔
1309
    auto t = g.add_table("StringsOnly");
4✔
1310
    auto col = t->add_column(type_String, "first", nullable_column);
4✔
1311
    t->add_search_index(col);
4✔
1312

1313
    std::string base(StringIndex::s_max_offset, 'a');
4✔
1314
    std::string str_a = base + "aaaaa";
4✔
1315
    std::string str_a0 = base + "aaaa0";
4✔
1316
    std::string str_ax = base + "aaaax";
4✔
1317
    std::string str_b = base + "bbbbb";
4✔
1318
    std::string str_c = base + "ccccc";
4✔
1319
    std::string str_c0 = base + "cccc0";
4✔
1320
    std::string str_cx = base + "ccccx";
4✔
1321

1322
    for (int i = 0; i < half_node_size * 3; i++) {
6,004✔
1323
        t->create_object().set(col, str_a);
6,000✔
1324
        t->create_object().set(col, str_b);
6,000✔
1325
        t->create_object().set(col, str_c);
6,000✔
1326
    }
6,000✔
1327
    t->create_object().set(col, str_ax);
4✔
1328
    t->create_object().set(col, str_ax);
4✔
1329
    t->create_object().set(col, str_a0);
4✔
1330
    /*
1331
    {
1332
        std::ofstream o("index.dot");
1333
        index->to_dot(o, "");
1334
    }
1335
    */
1336

1337
    auto ndx_a = t->where().equal(col, StringData(str_a)).find();
4✔
1338
    auto cnt = t->count_string(col, StringData(str_a));
4✔
1339
    auto tw_a = t->where().equal(col, StringData(str_a)).find_all();
4✔
1340
    CHECK_EQUAL(ndx_a, ObjKey(0));
4✔
1341
    CHECK_EQUAL(cnt, half_node_size * 3);
4✔
1342
    CHECK_EQUAL(tw_a.size(), half_node_size * 3);
4✔
1343
    ndx_a = t->where().equal(col, StringData(str_c0)).find();
4✔
1344
    CHECK_EQUAL(ndx_a, null_key);
4✔
1345
    ndx_a = t->where().equal(col, StringData(str_cx)).find();
4✔
1346
    CHECK_EQUAL(ndx_a, null_key);
4✔
1347
    // Find string that is 'less' than strings in the table, but with identical last key
1348
    tw_a = t->where().equal(col, StringData(str_c0)).find_all();
4✔
1349
    CHECK_EQUAL(tw_a.size(), 0);
4✔
1350
    // Find string that is 'greater' than strings in the table, but with identical last key
1351
    tw_a = t->where().equal(col, StringData(str_cx)).find_all();
4✔
1352
    CHECK_EQUAL(tw_a.size(), 0);
4✔
1353

1354
    // Same as above, but just for 'count' method
1355
    cnt = t->count_string(col, StringData(str_c0));
4✔
1356
    CHECK_EQUAL(cnt, 0);
4✔
1357
    cnt = t->count_string(col, StringData(str_cx));
4✔
1358
    CHECK_EQUAL(cnt, 0);
4✔
1359
}
4✔
1360

1361

1362
TEST(StringIndex_Fuzzy)
1363
{
2✔
1364
    constexpr size_t chunkcount = 50;
2✔
1365
    constexpr size_t rowcount = 100 + 1000 * TEST_DURATION;
2✔
1366

1367
    for (size_t main_rounds = 0; main_rounds < 2 + 10 * TEST_DURATION; main_rounds++) {
6✔
1368

1369
        Group g;
4✔
1370

1371
        auto t = g.add_table("StringsOnly");
4✔
1372
        auto col0 = t->add_column(type_String, "first");
4✔
1373
        auto col1 = t->add_column(type_String, "second");
4✔
1374

1375
        t->add_search_index(col0);
4✔
1376

1377
        std::string strings[chunkcount];
4✔
1378

1379
        for (size_t j = 0; j < chunkcount; j++) {
204✔
1380
            size_t len = fastrand() % REALM_MAX_BPNODE_SIZE;
200✔
1381

1382
            for (size_t i = 0; i < len; i++)
100,539✔
1383
                strings[j] += char(fastrand());
100,339✔
1384
        }
200✔
1385

1386
        for (size_t rows = 0; rows < rowcount; rows++) {
404✔
1387
            // Strings consisting of 2 concatenated strings are very interesting
1388
            size_t chunks;
400✔
1389
            if (fastrand() % 2 == 0)
400✔
1390
                chunks = fastrand() % 4;
199✔
1391
            else
201✔
1392
                chunks = 2;
201✔
1393

1394
            std::string str;
400✔
1395

1396
            for (size_t c = 0; c < chunks; c++) {
1,097✔
1397
                str += strings[fastrand() % chunkcount];
697✔
1398
            }
697✔
1399

1400
            t->create_object().set_all(str, str);
400✔
1401
        }
400✔
1402

1403
        for (size_t rounds = 0; rounds < 1 + 10 * TEST_DURATION; rounds++) {
8✔
1404
            for (auto obj : *t) {
400✔
1405

1406
                TableView tv0 = (t->column<String>(col0) == obj.get<String>(col0)).find_all();
400✔
1407
                TableView tv1 = (t->column<String>(col1) == obj.get<String>(col1)).find_all();
400✔
1408

1409
                CHECK_EQUAL(tv0.size(), tv1.size());
400✔
1410

1411
                for (size_t v = 0; v < tv0.size(); v++) {
1,608✔
1412
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
1,208✔
1413
                }
1,208✔
1414
            }
400✔
1415

1416

1417
            for (size_t r = 0; r < 5 + 1000 * TEST_DURATION; r++) {
24✔
1418
                size_t chunks;
20✔
1419
                if (fastrand() % 2 == 0)
20✔
1420
                    chunks = fastrand() % 4;
11✔
1421
                else
9✔
1422
                    chunks = 2;
9✔
1423

1424
                std::string str;
20✔
1425

1426
                for (size_t c = 0; c < chunks; c++) {
48✔
1427
                    str += strings[fastrand() % chunkcount];
28✔
1428
                }
28✔
1429

1430
                TableView tv0 = (t->column<String>(col0) == str).find_all();
20✔
1431
                TableView tv1 = (t->column<String>(col1) == str).find_all();
20✔
1432

1433
                CHECK_EQUAL(tv0.size(), tv1.size());
20✔
1434

1435
                for (size_t v = 0; v < tv0.size(); v++) {
114✔
1436
                    CHECK_EQUAL(tv0.get_key(v), tv1.get_key(v));
94✔
1437
                }
94✔
1438
            }
20✔
1439
            if (t->size() > 10)
4✔
1440
                t->get_object(0).remove();
4✔
1441

1442
            size_t r1 = fastrand() % t->size();
4✔
1443
            size_t r2 = fastrand() % t->size();
4✔
1444

1445
            std::string str = t->get_object(r2).get<String>(col0);
4✔
1446
            Obj obj = t->get_object(r1);
4✔
1447
            obj.set<String>(col0, StringData(str));
4✔
1448
            obj.set<String>(col1, StringData(str));
4✔
1449
        }
4✔
1450
    }
4✔
1451
}
2✔
1452

1453
namespace {
1454

1455
// results returned by the index should be in ascending row order
1456
// this requirement is assumed by the query system which runs find_gte
1457
// and this will return wrong results unless the results are ordered
1458
void check_result_order(const std::vector<ObjKey>& results, TestContext& test_context)
1459
{
44✔
1460
    const size_t num_results = results.size();
44✔
1461
    for (size_t i = 1; i < num_results; ++i) {
144✔
1462
        CHECK(results[i - 1] < results[i]);
100✔
1463
    }
100✔
1464
}
44✔
1465

1466
} // end anonymous namespace
1467

1468

1469
TEST_TYPES(StringIndex_Insensitive, string_column, nullable_string_column)
1470
{
4✔
1471
    TEST_TYPE test_resources;
4✔
1472
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1473

1474
    const char* strings[] = {"john",
4✔
1475
                             "John",
4✔
1476
                             "jOhn",
4✔
1477
                             "JOhn",
4✔
1478
                             "joHn",
4✔
1479
                             "JoHn",
4✔
1480
                             "jOHn",
4✔
1481
                             "JOHn",
4✔
1482
                             "johN",
4✔
1483
                             "JohN",
4✔
1484
                             "jOhN",
4✔
1485
                             "JOhN",
4✔
1486
                             "joHN",
4✔
1487
                             "JoHN",
4✔
1488
                             "jOHN",
4✔
1489
                             "JOHN",
4✔
1490
                             "john" /* yes, an extra to test the "bucket" case as well */,
4✔
1491
                             "hans",
4✔
1492
                             "Hansapark",
4✔
1493
                             "george",
4✔
1494
                             "billion dollar startup",
4✔
1495
                             "abcde",
4✔
1496
                             "abcdE",
4✔
1497
                             "Abcde",
4✔
1498
                             "AbcdE",
4✔
1499
                             "common",
4✔
1500
                             "common"};
4✔
1501

1502
    for (const char* string : strings) {
108✔
1503
        col.add(string);
108✔
1504
    }
108✔
1505

1506
    // Generate 255 strings with 1..255 'a' chars
1507
    for (int i = 1; i < 256; ++i) {
1,024✔
1508
        col.add(std::string(i, 'a').c_str());
1,020✔
1509
    }
1,020✔
1510

1511
    // Create a new index on column
1512
    const SearchIndex& ndx = *col.create_search_index();
4✔
1513

1514
    std::vector<ObjKey> results;
4✔
1515
    {
4✔
1516
        // case sensitive
1517
        ndx.find_all(results, strings[0]);
4✔
1518
        CHECK_EQUAL(2, results.size());
4✔
1519
        CHECK_EQUAL(col.get(results[0]), strings[0]);
4✔
1520
        CHECK_EQUAL(col.get(results[1]), strings[0]);
4✔
1521
        check_result_order(results, test_context);
4✔
1522
        results.clear();
4✔
1523
    }
4✔
1524

1525
    {
4✔
1526
        constexpr bool case_insensitive = true;
4✔
1527
        const char* needle = "john";
4✔
1528
        auto upper_needle = case_map(needle, true);
4✔
1529
        ndx.find_all(results, needle, case_insensitive);
4✔
1530
        CHECK_EQUAL(17, results.size());
4✔
1531
        for (size_t i = 0; i < results.size(); ++i) {
72✔
1532
            auto upper_result = case_map(col.get(results[i]), true);
68✔
1533
            CHECK_EQUAL(upper_result, upper_needle);
68✔
1534
        }
68✔
1535
        check_result_order(results, test_context);
4✔
1536
        results.clear();
4✔
1537
    }
4✔
1538

1539

1540
    {
4✔
1541
        struct TestData {
4✔
1542
            const bool case_insensitive;
4✔
1543
            const char* const needle;
4✔
1544
            const size_t result_size;
4✔
1545
        };
4✔
1546

1547
        TestData td[] = {
4✔
1548
            {true, "Hans", 1},
4✔
1549
            {true, "Geor", 0},
4✔
1550
            {true, "George", 1},
4✔
1551
            {true, "geoRge", 1},
4✔
1552
            {true, "Billion Dollar Startup", 1},
4✔
1553
            {true, "ABCDE", 4},
4✔
1554
            {true, "commON", 2},
4✔
1555
        };
4✔
1556

1557
        for (const TestData& t : td) {
28✔
1558
            ndx.find_all(results, t.needle, t.case_insensitive);
28✔
1559
            CHECK_EQUAL(t.result_size, results.size());
28✔
1560
            check_result_order(results, test_context);
28✔
1561
            results.clear();
28✔
1562
        }
28✔
1563
    }
4✔
1564

1565
    // Test generated 'a'-strings
1566
    for (int i = 1; i < 256; ++i) {
1,024✔
1567
        const std::string str = std::string(i, 'A');
1,020✔
1568
        ndx.find_all(results, str.c_str(), false);
1,020✔
1569
        CHECK_EQUAL(0, results.size());
1,020✔
1570
        ndx.find_all(results, str.c_str(), true);
1,020✔
1571
        CHECK_EQUAL(1, results.size());
1,020✔
1572
        results.clear();
1,020✔
1573
    }
1,020✔
1574
}
4✔
1575

1576

1577
/* Disabled until we have better support for case mapping unicode characters
1578

1579
TEST_TYPES(StringIndex_Insensitive_Unicode, non_nullable, nullable)
1580
{
1581
    constexpr bool nullable = TEST_TYPE::value;
1582

1583
    // Create a column with string values
1584
    ref_type ref = StringColumn::create(Allocator::get_default());
1585
    StringColumn col(Allocator::get_default(), ref, nullable);
1586

1587
    const char* strings[] = {
1588
        "æøå", "ÆØÅ",
1589
    };
1590

1591
    for (const char* string : strings) {
1592
        col.add(string);
1593
    }
1594

1595
    // Create a new index on column
1596
    const SearchIndex& ndx = *col.create_search_index();
1597

1598
    ref_type results_ref = IntegerColumn::create(Allocator::get_default());
1599
    IntegerColumn results(Allocator::get_default(), results_ref);
1600

1601
    {
1602
        struct TestData {
1603
            const bool case_insensitive;
1604
            const char* const needle;
1605
            const size_t result_size;
1606
        };
1607

1608
        TestData td[] = {
1609
            {false, "æøå", 1},
1610
            {false, "ÆØÅ", 1},
1611
            {true, "æøå", 2},
1612
            {true, "Æøå", 2},
1613
            {true, "æØå", 2},
1614
            {true, "ÆØå", 2},
1615
            {true, "æøÅ", 2},
1616
            {true, "ÆøÅ", 2},
1617
            {true, "æØÅ", 2},
1618
            {true, "ÆØÅ", 2},
1619
        };
1620

1621
        for (const TestData& t : td) {
1622
            ndx.find_all(results, t.needle, t.case_insensitive);
1623
            CHECK_EQUAL(t.result_size, results.size());
1624
            results.clear();
1625
        }
1626
    }
1627

1628
    // Clean up
1629
    results.destroy();
1630
    col.destroy();
1631
}
1632

1633
*/
1634

1635

1636
TEST_TYPES(StringIndex_45, string_column, nullable_string_column)
1637
{
4✔
1638
    TEST_TYPE test_resources;
4✔
1639
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1640
    const SearchIndex& ndx = *col.create_search_index();
4✔
1641
    std::string a4 = std::string(4, 'a');
4✔
1642
    std::string A5 = std::string(5, 'A');
4✔
1643

1644
    col.add(a4);
4✔
1645
    col.add(a4);
4✔
1646

1647
    std::vector<ObjKey> res;
4✔
1648

1649
    ndx.find_all(res, A5.c_str(), true);
4✔
1650
    CHECK_EQUAL(res.size(), 0);
4✔
1651
}
4✔
1652

1653

1654
namespace {
1655

1656
std::string create_random_a_string(size_t max_len)
1657
{
×
1658
    std::string s;
×
1659
    size_t len = size_t(fastrand(max_len));
×
1660
    for (size_t p = 0; p < len; p++) {
×
1661
        s += fastrand(1) == 0 ? 'a' : 'A';
×
1662
    }
×
1663
    return s;
×
1664
}
×
1665

1666
} // namespace
1667

1668

1669
// Excluded when run with valgrind because it takes a long time
1670
TEST_TYPES_IF(StringIndex_Insensitive_Fuzz, TEST_DURATION > 1, string_column, nullable_string_column)
UNCOV
1671
{
×
1672
    const size_t max_str_len = 9;
×
1673
    const size_t iters = 3;
×
1674

1675
    for (size_t iter = 0; iter < iters; iter++) {
×
1676
        TEST_TYPE test_resources;
×
1677
        typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
×
1678

1679
        size_t rows = size_t(fastrand(2 * REALM_MAX_BPNODE_SIZE - 1));
×
1680

1681
        // Add 'rows' number of rows in the column
1682
        for (size_t t = 0; t < rows; t++) {
×
1683
            std::string str = create_random_a_string(max_str_len);
×
1684
            col.add(str);
×
1685
        }
×
1686

1687
        const SearchIndex& ndx = *col.create_search_index();
×
1688

1689
        for (size_t t = 0; t < 1000; t++) {
×
1690
            std::string needle = create_random_a_string(max_str_len);
×
1691

1692
            std::vector<ObjKey> res;
×
1693

1694
            ndx.find_all(res, needle.c_str(), true);
×
1695
            check_result_order(res, test_context);
×
1696

1697
            // Check that all items in 'res' point at a match in 'col'
1698
            auto needle_upper = case_map(needle, true);
×
1699
            for (size_t res_ndx = 0; res_ndx < res.size(); res_ndx++) {
×
1700
                auto res_upper = case_map(col.get(res[res_ndx]), true);
×
1701
                CHECK_EQUAL(res_upper, needle_upper);
×
1702
            }
×
1703

1704
            // Check that all matches in 'col' exist in 'res'
1705
            for (size_t col_ndx = 0; col_ndx < col.size(); col_ndx++) {
×
1706
                auto str_upper = case_map(col.get(col_ndx), true);
×
1707
                if (str_upper == needle_upper) {
×
1708
                    CHECK(std::find(res.begin(), res.end(), col.key(col_ndx)) != res.end());
×
1709
                }
×
1710
            }
×
1711
        }
×
1712
    }
×
1713
}
×
1714

1715
// Exercise the StringIndex case insensitive search for strings with very long, common prefixes
1716
// to cover the special case code paths where different strings are stored in a list.
1717
TEST_TYPES(StringIndex_Insensitive_VeryLongStrings, string_column, nullable_string_column)
1718
{
4✔
1719
    TEST_TYPE test_resources;
4✔
1720
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1721
    const SearchIndex& ndx = *col.create_search_index();
4✔
1722

1723
    std::string long1 = std::string(StringIndex::s_max_offset + 10, 'a');
4✔
1724
    std::string long2 = long1 + "b";
4✔
1725
    std::string long3 = long1 + "c";
4✔
1726

1727
    // Add the strings in a "random" order
1728
    col.add(long1);
4✔
1729
    col.add(long2);
4✔
1730
    col.add(long2);
4✔
1731
    col.add(long1);
4✔
1732
    col.add(long3);
4✔
1733
    col.add(long2);
4✔
1734
    col.add(long1);
4✔
1735
    col.add(long1);
4✔
1736

1737
    std::vector<ObjKey> results;
4✔
1738

1739
    ndx.find_all(results, long1.c_str(), true);
4✔
1740
    CHECK_EQUAL(results.size(), 4);
4✔
1741
    check_result_order(results, test_context);
4✔
1742
    results.clear();
4✔
1743
    ndx.find_all(results, long2.c_str(), true);
4✔
1744
    CHECK_EQUAL(results.size(), 3);
4✔
1745
    results.clear();
4✔
1746
    ndx.find_all(results, long3.c_str(), true);
4✔
1747
    CHECK_EQUAL(results.size(), 1);
4✔
1748
    results.clear();
4✔
1749
}
4✔
1750

1751

1752
// Bug with case insensitive search on numbers that gives duplicate results
1753
TEST_TYPES(StringIndex_Insensitive_Numbers, string_column, nullable_string_column)
1754
{
4✔
1755
    TEST_TYPE test_resources;
4✔
1756
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1757
    const SearchIndex& ndx = *col.create_search_index();
4✔
1758

1759
    constexpr const char* number_string_16 = "1111111111111111";
4✔
1760
    constexpr const char* number_string_17 = "11111111111111111";
4✔
1761

1762
    col.add(number_string_16);
4✔
1763
    col.add(number_string_17);
4✔
1764

1765
    std::vector<ObjKey> results;
4✔
1766

1767
    ndx.find_all(results, number_string_16, true);
4✔
1768
    CHECK_EQUAL(results.size(), 1);
4✔
1769
}
4✔
1770

1771

1772
TEST_TYPES(StringIndex_Rover, string_column, nullable_string_column)
1773
{
4✔
1774
    TEST_TYPE test_resources;
4✔
1775
    typename TEST_TYPE::ColumnTestType& col = test_resources.get_column();
4✔
1776

1777
    const SearchIndex& ndx = *col.create_search_index();
4✔
1778

1779
    col.add("ROVER");
4✔
1780
    col.add("Rover");
4✔
1781

1782
    std::vector<ObjKey> results;
4✔
1783

1784
    ndx.find_all(results, "rover", true);
4✔
1785
    CHECK_EQUAL(results.size(), 2);
4✔
1786
    check_result_order(results, test_context);
4✔
1787
}
4✔
1788

1789
TEST(StringIndex_QuerySingleObject)
1790
{
2✔
1791
    Group g;
2✔
1792
    auto table = g.add_table_with_primary_key("class_StringClass", type_String, "name", true);
2✔
1793
    table->create_object_with_primary_key("Foo");
2✔
1794

1795
    auto q = table->where().equal(table->get_column_key("name"), "Foo", true);
2✔
1796
    CHECK_EQUAL(q.count(), 1);
2✔
1797
    q = table->where().equal(table->get_column_key("name"), "Bar", true);
2✔
1798
    CHECK_EQUAL(q.count(), 0);
2✔
1799
}
2✔
1800

1801
TEST(StringIndex_MixedNonEmptyTable)
1802
{
2✔
1803
    Group g;
2✔
1804
    auto table = g.add_table("foo");
2✔
1805
    auto col = table->add_column(type_Mixed, "any");
2✔
1806
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1807
    table->add_search_index(col);
2✔
1808
}
2✔
1809

1810
TEST(StringIndex_MixedWithNestedCollections)
1811
{
2✔
1812
    Group g;
2✔
1813
    auto table = g.add_table("foo");
2✔
1814
    auto col = table->add_column(type_Mixed, "value");
2✔
1815
    table->add_search_index(col);
2✔
1816
    table->create_object().set(col, Mixed("apple"));
2✔
1817
    auto obj = table->create_object();
2✔
1818
    obj.set(col, Mixed("banana"));
2✔
1819

1820
    auto q = table->query("value = 'banana'");
2✔
1821

1822
    CHECK_EQUAL(q.count(), 1);
2✔
1823
    obj.set_collection(col, CollectionType::Dictionary);
2✔
1824
    CHECK_EQUAL(q.count(), 0);
2✔
1825
    obj.set(col, Mixed("banana"));
2✔
1826
    CHECK_EQUAL(q.count(), 1);
2✔
1827
}
2✔
1828

1829
TEST(StringIndex_MixedEqualBitPattern)
1830
{
2✔
1831
    Group g;
2✔
1832
    auto table = g.add_table("foo");
2✔
1833
    auto col = table->add_column(type_Mixed, "any");
2✔
1834
    table->add_search_index(col);
2✔
1835

1836
    Mixed val1(int64_t(0x6867666564636261));
2✔
1837
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1838
    // From single value to list
1839
    table->create_object().set(col, val1);
2✔
1840

1841
    auto tv = table->where().equal(col, val1).find_all();
2✔
1842
    CHECK_EQUAL(tv.size(), 1);
2✔
1843
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1844

1845
    table->clear();
2✔
1846
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1847
    table->create_object().set(col, Mixed("abcdefgh"));
2✔
1848
    // Insert in existing list
1849
    table->create_object().set(col, val1);
2✔
1850

1851
    tv = table->where().equal(col, val1).find_all();
2✔
1852
    CHECK_EQUAL(tv.size(), 1);
2✔
1853
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1854
    tv = table->where().equal(col, Mixed("abcdefgh")).find_all();
2✔
1855
    CHECK_EQUAL(tv.size(), 2);
2✔
1856

1857
    // Add another one into existing list
1858
    table->create_object().set(col, val1);
2✔
1859
    tv = table->where().equal(col, val1).find_all();
2✔
1860
    CHECK_EQUAL(tv.size(), 2);
2✔
1861
    CHECK_EQUAL(tv.get_object(0).get_any(col), val1);
2✔
1862
    CHECK_EQUAL(tv.get_object(1).get_any(col), val1);
2✔
1863
}
2✔
1864

1865
TEST(Unicode_Casemap)
1866
{
2✔
1867
    std::string inp = "±ÀÁÂÃÄÅÆÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß×÷";
2✔
1868
    auto out = case_map(inp, false);
2✔
1869
    if (CHECK(out)) {
2✔
1870
        CHECK_EQUAL(*out, "±àáâãäåæèéêëìíîïñòóôõöøùúûüýß×÷");
2✔
1871
    }
2✔
1872
    out = case_map(*out, true);
2✔
1873
    if (CHECK(out)) {
2✔
1874
        CHECK_EQUAL(*out, inp);
2✔
1875
    }
2✔
1876

1877
    inp = "A very old house 🏠 is on 🔥, we have to save the 🦄";
2✔
1878
    out = case_map(inp, true);
2✔
1879
    if (CHECK(out)) {
2✔
1880
        CHECK_EQUAL(*out, "A VERY OLD HOUSE 🏠 IS ON 🔥, WE HAVE TO SAVE THE 🦄");
2✔
1881
    }
2✔
1882

1883
    StringData trailing_garbage(inp.data(), 19); // String terminated inside icon
2✔
1884
    out = case_map(trailing_garbage, true);
2✔
1885
    CHECK_NOT(out);
2✔
1886

1887
    inp = "rødgrød med fløde";
2✔
1888
    out = case_map(inp, true);
2✔
1889
    if (CHECK(out)) {
2✔
1890
        CHECK_EQUAL(*out, "RØDGRØD MED FLØDE");
2✔
1891
    }
2✔
1892
    out = case_map(out, false);
2✔
1893
    if (CHECK(out)) {
2✔
1894
        CHECK_EQUAL(*out, inp);
2✔
1895
    }
2✔
1896
}
2✔
1897

1898
static std::string random_string(std::string::size_type length)
1899
{
60,000✔
1900
    static auto& chrs = "0123456789"
60,000✔
1901
                        "abcdefghijklmnopqrstuvwxyz"
60,000✔
1902
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
60,000✔
1903

1904
    thread_local static std::mt19937 rg{std::random_device{}()};
60,000✔
1905
    thread_local static std::uniform_int_distribution<std::string::size_type> pick(0, sizeof(chrs) - 2);
60,000✔
1906

1907
    std::string s;
60,000✔
1908

1909
    s.reserve(length);
60,000✔
1910

1911
    while (length--)
900,000✔
1912
        s += chrs[pick(rg)];
840,000✔
1913

1914
    return s;
60,000✔
1915
}
60,000✔
1916

1917
TEST(StringIndex_ListOfRandomStrings)
1918
{
2✔
1919
    using namespace std::chrono;
2✔
1920

1921
    SHARED_GROUP_TEST_PATH(path);
2✔
1922
    auto db = DB::create(path);
2✔
1923
    auto wt = db->start_write();
2✔
1924

1925
    auto t = wt->add_table_with_primary_key("foo", type_Int, "_id");
2✔
1926
    ColKey col_codes = t->add_column_list(type_String, "codes");
2✔
1927
    std::string some_string;
2✔
1928

1929
    for (size_t i = 0; i < 10000; i++) {
20,002✔
1930
        auto obj = t->create_object_with_primary_key(int64_t(i));
20,000✔
1931
        auto list = obj.get_list<String>(col_codes);
20,000✔
1932
        for (size_t j = 0; j < 3; j++) {
80,000✔
1933
            std::string str(random_string(14));
60,000✔
1934
            if (i == 5000 && j == 0) {
60,000✔
1935
                some_string = str;
2✔
1936
            }
2✔
1937
            list.add(StringData(str));
60,000✔
1938
        }
60,000✔
1939
    }
20,000✔
1940

1941
    std::vector<Mixed> arguments{Mixed(some_string)};
2✔
1942
    auto q = wt->get_table("foo")->query("codes = $0", arguments);
2✔
1943
    // auto t1 = steady_clock::now();
1944
    auto tv = q.find_all();
2✔
1945
    // auto t2 = steady_clock::now();
1946
    // std::cout << "time without index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1947
    CHECK_EQUAL(tv.size(), 1);
2✔
1948
    t->add_search_index(col_codes);
2✔
1949

1950
    // t1 = steady_clock::now();
1951
    tv = q.find_all();
2✔
1952
    // t2 = steady_clock::now();
1953
    // std::cout << "time with index: " << duration_cast<microseconds>(t2 - t1).count() << " us" << std::endl;
1954
    CHECK_EQUAL(tv.size(), 1);
2✔
1955
    t->add_search_index(col_codes);
2✔
1956

1957
    // std::cout << tv.get_object(0).get<Int>("_id") << std::endl;
1958
}
2✔
1959

1960
TEST_TYPES(StringIndex_ListOfStrings, std::true_type, std::false_type)
1961
{
4✔
1962
    constexpr bool add_index = TEST_TYPE::value;
4✔
1963
    Group g;
4✔
1964

1965
    auto t = g.add_table("foo");
4✔
1966
    ColKey col = t->add_column_list(type_String, "names", true);
4✔
1967
    if constexpr (add_index) {
4✔
1968
        t->add_search_index(col);
2✔
1969
    }
2✔
1970

1971
    auto obj1 = t->create_object();
4✔
1972
    auto obj2 = t->create_object();
4✔
1973
    auto obj3 = t->create_object();
4✔
1974

1975
    for (Obj* obj : {&obj2, &obj3}) {
8✔
1976
        auto list = obj->get_list<String>(col);
8✔
1977
        list.add("Johnny");
8✔
1978
        list.add("John");
8✔
1979
    }
8✔
1980

1981
    auto list = obj1.get_list<String>(col);
4✔
1982
    list.add("Johnny");
4✔
1983
    list.add("John");
4✔
1984
    list.add("Ivan");
4✔
1985
    list.add("Ivan");
4✔
1986
    list.add(StringData());
4✔
1987

1988
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
1989
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 3);
4✔
1990
    CHECK_EQUAL(t->query(R"(names = NULL)").count(), 1);
4✔
1991

1992
    list.set(0, "Paul");
4✔
1993
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 3);
4✔
1994
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
1995
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
1996

1997
    list.remove(1);
4✔
1998
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
1999
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2000
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 1);
4✔
2001
    CHECK_EQUAL(t->query(R"(names = "Ivan")").count(), 1);
4✔
2002

2003
    list.clear();
4✔
2004
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 2);
4✔
2005
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 2);
4✔
2006
    CHECK_EQUAL(t->query(R"(names = "Paul")").count(), 0);
4✔
2007

2008
    list = obj2.get_list<String>(col);
4✔
2009
    list.insert(0, "Adam");
4✔
2010
    list.insert(0, "Adam");
4✔
2011
    obj2.remove();
4✔
2012
    CHECK_EQUAL(t->query(R"(names = "John")").count(), 1);
4✔
2013
    CHECK_EQUAL(t->query(R"(names = "Johnny")").count(), 1);
4✔
2014

2015
    std::string long1 = std::string(StringIndex::s_max_offset, 'a');
4✔
2016
    std::string long2 = long1 + "b";
4✔
2017

2018
    list = obj1.get_list<String>(col);
4✔
2019
    list.add(long1);
4✔
2020
    if (add_index) {
4✔
2021
        CHECK_THROW_ANY(list.add(long2));
2✔
2022
    }
2✔
2023
}
4✔
2024

2025
#endif // TEST_INDEX_STRING
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc