9054

pending completion

Build # 9054

Build Type

push

travis-ci

Committed by

kostja

Commit Message

test: update results

Update test results (changes not committed in scope of gh-2663).

Run Details

30513 of 35273 relevant lines covered (86.51%)

1132512.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.37

/src/box/vy_range.c

/*
 * Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * 1. Redistributions of source code must retain the above
 *    copyright notice, this list of conditions and the
 *    following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the following
 *    disclaimer in the documentation and/or other materials
 *    provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#include "vy_range.h"

#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

#define RB_COMPACT 1
#include <small/rb.h>
#include <small/rlist.h>

#include "diag.h"
#include "iterator_type.h"
#include "key_def.h"
#include "trivia/util.h"
#include "tuple.h"
#include "tuple_compare.h"
#include "vy_run.h"
#include "vy_stat.h"
#include "vy_stmt.h"

int
vy_range_tree_cmp(struct vy_range *range_a, struct vy_range *range_b)
{
        if (range_a == range_b)
                return 0;

        /* Any key > -inf. */
        if (range_a->begin == NULL)
                return -1;
        if (range_b->begin == NULL)
                return 1;

        assert(range_a->cmp_def == range_b->cmp_def);
        return vy_key_compare(range_a->begin, range_b->begin,
                              range_a->cmp_def);
}

int
vy_range_tree_key_cmp(const struct tuple *stmt, struct vy_range *range)
{
        /* Any key > -inf. */
        if (range->begin == NULL)
                return 1;
        return vy_stmt_compare_with_key(stmt, range->begin, range->cmp_def);
}

struct vy_range *
vy_range_tree_find_by_key(vy_range_tree_t *tree,
                          enum iterator_type iterator_type,
                          const struct tuple *key)
{
        uint32_t key_field_count = tuple_field_count(key);
        if (key_field_count == 0) {
                switch (iterator_type) {
                case ITER_LT:
                case ITER_LE:
                        return vy_range_tree_last(tree);
                case ITER_GT:
                case ITER_GE:
                case ITER_EQ:
                        return vy_range_tree_first(tree);
                default:
                        unreachable();
                        return NULL;
                }
        }
        struct vy_range *range;
        if (iterator_type == ITER_GE || iterator_type == ITER_GT ||
            iterator_type == ITER_EQ) {
                /**
                 * Case 1. part_count == 1, looking for [10]. ranges:
                 * {1, 3, 5} {7, 8, 9} {10, 15 20} {22, 32, 42}
                 *                      ^looking for this
                 * Case 2. part_count == 1, looking for [10]. ranges:
                 * {1, 2, 4} {5, 6, 7, 8} {50, 100, 200}
                 *            ^looking for this
                 * Case 3. part_count == 2, looking for [10]. ranges:
                 * {[1, 2], [2, 3]} {[9, 1], [10, 1], [10 2], [11 3]} {[12,..}
                 *                   ^looking for this
                 * Case 4. part_count == 2, looking for [10]. ranges:
                 * {[1, 2], [10, 1]} {[10, 2] [10 3] [11 3]} {[12, 1]..}
                 *  ^looking for this
                 * Case 5. part_count does not matter, looking for [10].
                 * ranges:
                 * {100, 200}, {300, 400}
                 * ^looking for this
                 */
                /**
                 * vy_range_tree_psearch finds least range with begin == key
                 * or previous if equal was not found
                 */
                range = vy_range_tree_psearch(tree, key);
                /* switch to previous for case (4) */
                if (range != NULL && range->begin != NULL &&
                    key_field_count < range->cmp_def->part_count &&
                    vy_stmt_compare_with_key(key, range->begin,
                                             range->cmp_def) == 0)
                        range = vy_range_tree_prev(tree, range);
                /* for case 5 or subcase of case 4 */
                if (range == NULL)
                        range = vy_range_tree_first(tree);
        } else {
                assert(iterator_type == ITER_LT || iterator_type == ITER_LE);
                /**
                 * Case 1. part_count == 1, looking for [10]. ranges:
                 * {1, 3, 5} {7, 8, 9} {10, 15 20} {22, 32, 42}
                 *                      ^looking for this
                 * Case 2. part_count == 1, looking for [10]. ranges:
                 * {1, 2, 4} {5, 6, 7, 8} {50, 100, 200}
                 *            ^looking for this
                 * Case 3. part_count == 2, looking for [10]. ranges:
                 * {[1, 2], [2, 3]} {[9, 1], [10, 1], [10 2], [11 3]} {[12,..}
                 *                   ^looking for this
                 * Case 4. part_count == 2, looking for [10]. ranges:
                 * {[1, 2], [10, 1]} {[10, 2] [10 3] [11 3]} {[12, 1]..}
                 *                    ^looking for this
                 * Case 5. part_count does not matter, looking for [10].
                 * ranges:
                 * {1, 2}, {3, 4, ..}
                 *          ^looking for this
                 */
                /**
                 * vy_range_tree_nsearch finds most range with begin == key
                 * or next if equal was not found
                 */
                range = vy_range_tree_nsearch(tree, key);
                if (range != NULL) {
                        /* fix curr_range for cases 2 and 3 */
                        if (range->begin != NULL &&
                            vy_stmt_compare_with_key(key, range->begin,
                                                     range->cmp_def) != 0) {
                                struct vy_range *prev;
                                prev = vy_range_tree_prev(tree, range);
                                if (prev != NULL)
                                        range = prev;
                        }
                } else {
                        /* Case 5 */
                        range = vy_range_tree_last(tree);
                }
        }
        return range;
}

struct vy_range *
vy_range_new(int64_t id, struct tuple *begin, struct tuple *end,
             const struct key_def *cmp_def)
{
        struct vy_range *range = calloc(1, sizeof(*range));
        if (range == NULL) {
                diag_set(OutOfMemory, sizeof(*range),
                         "malloc", "struct vy_range");
                return NULL;
        }
        range->id = id;
        if (begin != NULL) {
                tuple_ref(begin);
                range->begin = begin;
        }
        if (end != NULL) {
                tuple_ref(end);
                range->end = end;
        }
        range->cmp_def = cmp_def;
        rlist_create(&range->slices);
        range->heap_node.pos = UINT32_MAX;
        return range;
}

void
vy_range_delete(struct vy_range *range)
{
        if (range->begin != NULL)
                tuple_unref(range->begin);
        if (range->end != NULL)
                tuple_unref(range->end);

        struct vy_slice *slice, *next_slice;
        rlist_foreach_entry_safe(slice, &range->slices, in_range, next_slice)
                vy_slice_delete(slice);

        TRASH(range);
        free(range);
}

int
vy_range_snprint(char *buf, int size, const struct vy_range *range)
{
        int total = 0;
        SNPRINT(total, snprintf, buf, size, "(");
        if (range->begin != NULL)
                SNPRINT(total, vy_key_snprint, buf, size,
                        tuple_data(range->begin));
        else
                SNPRINT(total, snprintf, buf, size, "-inf");
        SNPRINT(total, snprintf, buf, size, "..");
        if (range->end != NULL)
                SNPRINT(total, vy_key_snprint, buf, size,
                        tuple_data(range->end));
        else
                SNPRINT(total, snprintf, buf, size, "inf");
        SNPRINT(total, snprintf, buf, size, ")");
        return total;
}

void
vy_range_add_slice(struct vy_range *range, struct vy_slice *slice)
{
        rlist_add_entry(&range->slices, slice, in_range);
        range->slice_count++;
        vy_disk_stmt_counter_add(&range->count, &slice->count);
}

void
vy_range_add_slice_before(struct vy_range *range, struct vy_slice *slice,
                          struct vy_slice *next_slice)
{
        rlist_add_tail(&next_slice->in_range, &slice->in_range);
        range->slice_count++;
        vy_disk_stmt_counter_add(&range->count, &slice->count);
}

void
vy_range_remove_slice(struct vy_range *range, struct vy_slice *slice)
{
        assert(range->slice_count > 0);
        assert(!rlist_empty(&range->slices));
        rlist_del_entry(slice, in_range);
        range->slice_count--;
        vy_disk_stmt_counter_sub(&range->count, &slice->count);
}

/**
 * To reduce write amplification caused by compaction, we follow
 * the LSM tree design. Runs in each range are divided into groups
 * called levels:
 *
 *   level 1: runs 1 .. L_1
 *   level 2: runs L_1 + 1 .. L_2
 *   ...
 *   level N: runs L_{N-1} .. L_N
 *
 * where L_N is the total number of runs, N is the total number of
 * levels, older runs have greater numbers. Runs at each subsequent
 * are run_size_ratio times larger than on the previous one. When
 * the number of runs at a level exceeds run_count_per_level, we
 * compact all its runs along with all runs from the upper levels
 * and in-memory indexes.  Including  previous levels into
 * compaction is relatively cheap, because of the level size
 * ratio.
 *
 * Given a range, this function computes the maximal level that needs
 * to be compacted and sets @compact_priority to the number of runs in
 * this level and all preceding levels.
 */
void
vy_range_update_compact_priority(struct vy_range *range,
                                 const struct index_opts *opts)
{
        assert(opts->run_count_per_level > 0);
        assert(opts->run_size_ratio > 1);

        range->compact_priority = 0;

        /* Total number of checked runs. */
        uint32_t total_run_count = 0;
        /* The total size of runs checked so far. */
        uint64_t total_size = 0;
        /* Estimated size of a compacted run, if compaction is scheduled. */
        uint64_t est_new_run_size = 0;
        /* The number of runs at the current level. */
        uint32_t level_run_count = 0;
        /*
         * The target (perfect) size of a run at the current level.
         * For the first level, it's the size of the newest run.
         * For lower levels it's computed as first level run size
         * times run_size_ratio.
         */
        uint64_t target_run_size = 0;

        struct vy_slice *slice;
        rlist_foreach_entry(slice, &range->slices, in_range) {
                uint64_t size = slice->count.bytes_compressed;
                /*
                 * The size of the first level is defined by
                 * the size of the most recent run.
                 */
                if (target_run_size == 0)
                        target_run_size = size;
                total_size += size;
                level_run_count++;
                total_run_count++;
                while (size > target_run_size) {
                        /*
                         * The run size exceeds the threshold
                         * set for the current level. Move this
                         * run down to a lower level. Switch the
                         * current level and reset the level run
                         * count.
                         */
                        level_run_count = 1;
                        /*
                         * If we have already scheduled
                         * a compaction of an upper level, and
                         * estimated compacted run will end up at
                         * this level, include the new run into
                         * this level right away to avoid
                         * a cascading compaction.
                         */
                        if (est_new_run_size > target_run_size)
                                level_run_count++;
                        /*
                         * Calculate the target run size for this
                         * level.
                         */
                        target_run_size *= opts->run_size_ratio;
                        /*
                         * Keep pushing the run down until
                         * we find an appropriate level for it.
                         */
                }
                if (level_run_count > opts->run_count_per_level) {
                        /*
                         * The number of runs at the current level
                         * exceeds the configured maximum. Arrange
                         * for compaction. We compact all runs at
                         * this level and upper levels.
                         */
                        range->compact_priority = total_run_count;
                        est_new_run_size = total_size;
                }
        }
}

/**
 * Return true and set split_key accordingly if the range needs to be
 * split in two.
 *
 * - We should never split a range until it was merged at least once
 *   (actually, it should be a function of run_count_per_level/number
 *   of runs used for the merge: with low run_count_per_level it's more
 *   than once, with high run_count_per_level it's once).
 * - We should use the last run size as the size of the range.
 * - We should split around the last run middle key.
 * - We should only split if the last run size is greater than
 *   4/3 * range_size.
 */
bool
vy_range_needs_split(struct vy_range *range, const struct index_opts *opts,
                     const char **p_split_key)
{
        struct vy_slice *slice;

        /* The range hasn't been merged yet - too early to split it. */
        if (range->n_compactions < 1)
                return false;

        /* Find the oldest run. */
        assert(!rlist_empty(&range->slices));
        slice = rlist_last_entry(&range->slices, struct vy_slice, in_range);

        /* The range is too small to be split. */
        if (slice->count.bytes_compressed < opts->range_size * 4 / 3)
                return false;

        /* Find the median key in the oldest run (approximately). */
        struct vy_page_info *mid_page;
        mid_page = vy_run_page_info(slice->run, slice->first_page_no +
                                    (slice->last_page_no -
                                     slice->first_page_no) / 2);

        struct vy_page_info *first_page = vy_run_page_info(slice->run,
                                                slice->first_page_no);

        /* No point in splitting if a new range is going to be empty. */
        if (key_compare(first_page->min_key, mid_page->min_key,
                        range->cmp_def) == 0)
                return false;
        /*
         * In extreme cases the median key can be < the beginning
         * of the slice, e.g.
         *
         * RUN:
         * ... |---- page N ----|-- page N + 1 --|-- page N + 2 --
         *     | min_key = [10] | min_key = [50] | min_key = [100]
         *
         * SLICE:
         * begin = [30], end = [70]
         * first_page_no = N, last_page_no = N + 1
         *
         * which makes mid_page_no = N and mid_page->min_key = [10].
         *
         * In such cases there's no point in splitting the range.
         */
        if (slice->begin != NULL && key_compare(mid_page->min_key,
                        tuple_data(slice->begin), range->cmp_def) <= 0)
                return false;
        /*
         * The median key can't be >= the end of the slice as we
         * take the min key of a page for the median key.
         */
        assert(slice->end == NULL || key_compare(mid_page->min_key,
                        tuple_data(slice->end), range->cmp_def) < 0);

        *p_split_key = mid_page->min_key;
        return true;
}

/**
 * Check if a range should be coalesced with one or more its neighbors.
 * If it should, return true and set @p_first and @p_last to the first
 * and last ranges to coalesce, otherwise return false.
 *
 * We coalesce ranges together when they become too small, less than
 * half the target range size to avoid split-coalesce oscillations.
 */
bool
vy_range_needs_coalesce(struct vy_range *range, vy_range_tree_t *tree,
                        const struct index_opts *opts,
                        struct vy_range **p_first, struct vy_range **p_last)
{
        struct vy_range *it;

        /* Size of the coalesced range. */
        uint64_t total_size = range->count.bytes_compressed;
        /* Coalesce ranges until total_size > max_size. */
        uint64_t max_size = opts->range_size / 2;

        /*
         * We can't coalesce a range that was scheduled for dump
         * or compaction, because it is about to be processed by
         * a worker thread.
         */
        assert(!vy_range_is_scheduled(range));

        *p_first = *p_last = range;
        for (it = vy_range_tree_next(tree, range);
             it != NULL && !vy_range_is_scheduled(it);
             it = vy_range_tree_next(tree, it)) {
                uint64_t size = it->count.bytes_compressed;
                if (total_size + size > max_size)
                        break;
                total_size += size;
                *p_last = it;
        }
        for (it = vy_range_tree_prev(tree, range);
             it != NULL && !vy_range_is_scheduled(it);
             it = vy_range_tree_prev(tree, it)) {
                uint64_t size = it->count.bytes_compressed;
                if (total_size + size > max_size)
                        break;
                total_size += size;
                *p_first = it;
        }
        return *p_first != *p_last;
}

void
vy_range_iterator_open(struct vy_range_iterator *itr, vy_range_tree_t *tree,
                       enum iterator_type iterator_type,
                       const struct tuple *key)
{
        itr->tree = tree;
        itr->iterator_type = iterator_type;
        itr->key = key;
        itr->curr_range = NULL;
}

void
vy_range_iterator_next(struct vy_range_iterator *itr, struct vy_range **result)
{
        struct vy_range *curr = itr->curr_range;
        struct vy_range *next;

        if (curr == NULL) {
                /* First iteration */
                next = vy_range_tree_find_by_key(itr->tree, itr->iterator_type,
                                                 itr->key);
                goto out;
        }
        switch (itr->iterator_type) {
        case ITER_LT:
        case ITER_LE:
                next = vy_range_tree_prev(itr->tree, curr);
                break;
        case ITER_GT:
        case ITER_GE:
                next = vy_range_tree_next(itr->tree, curr);
                break;
        case ITER_EQ:
                if (curr->end != NULL &&
                    vy_stmt_compare_with_key(itr->key, curr->end,
                                             curr->cmp_def) >= 0) {
                        /* A partial key can be found in more than one range. */
                        next = vy_range_tree_next(itr->tree, curr);
                } else {
                        next = NULL;
                }
                break;
        default:
                unreachable();
        }
out:
        *result = itr->curr_range = next;
}

void
vy_range_iterator_restore(struct vy_range_iterator *itr,
                          const struct tuple *last_stmt,
                          struct vy_range **result)
{
        struct vy_range *curr = vy_range_tree_find_by_key(itr->tree,
                                itr->iterator_type,
                                last_stmt != NULL ? last_stmt : itr->key);
        *result = itr->curr_range = curr;
}

1	/*
2	* Copyright 2010-2017, Tarantool AUTHORS, please see AUTHORS file.
3	*
4	* Redistribution and use in source and binary forms, with or
5	* without modification, are permitted provided that the following
6	* conditions are met:
7	*
8	* 1. Redistributions of source code must retain the above
9	* copyright notice, this list of conditions and the
10	* following disclaimer.
11	*
12	* 2. Redistributions in binary form must reproduce the above
13	* copyright notice, this list of conditions and the following
14	* disclaimer in the documentation and/or other materials
15	* provided with the distribution.
16	*
17	* THIS SOFTWARE IS PROVIDED BY AUTHORS ``AS IS'' AND
18	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19	* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
21	* AUTHORS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
22	* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25	* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26	* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
28	* THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29	* SUCH DAMAGE.
30	*/
31	#include "vy_range.h"
32
33	#include <assert.h>
34	#include <stdbool.h>
35	#include <stddef.h>
36	#include <stdint.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39
40	#define RB_COMPACT 1
41	#include <small/rb.h>
42	#include <small/rlist.h>
43
44	#include "diag.h"
45	#include "iterator_type.h"
46	#include "key_def.h"
47	#include "trivia/util.h"
48	#include "tuple.h"
49	#include "tuple_compare.h"
50	#include "vy_run.h"
51	#include "vy_stat.h"
52	#include "vy_stmt.h"
53
54	int
55	vy_range_tree_cmp(struct vy_range range_a, struct vy_range range_b)	12,083✔
56	{
57	if (range_a == range_b)	12,083✔
58	return 0;	11,898✔
59
60	/* Any key > -inf. */
61	if (range_a->begin == NULL)	185✔
62	return -1;	111✔
63	if (range_b->begin == NULL)	74✔
64	return 1;	9✔
65
66	assert(range_a->cmp_def == range_b->cmp_def);	65✔
67	return vy_key_compare(range_a->begin, range_b->begin,	65✔
68	range_a->cmp_def);
69	}
70
71	int
72	vy_range_tree_key_cmp(const struct tuple stmt, struct vy_range range)	473,713✔
73	{
74	/* Any key > -inf. */
75	if (range->begin == NULL)	473,713✔
76	return 1;	473,144✔
77	return vy_stmt_compare_with_key(stmt, range->begin, range->cmp_def);	569✔
78	}
79
80	struct vy_range *
81	vy_range_tree_find_by_key(vy_range_tree_t *tree,	474,385✔
82	enum iterator_type iterator_type,
83	const struct tuple *key)
84	{
85	uint32_t key_field_count = tuple_field_count(key);	474,385✔
86	if (key_field_count == 0) {	474,385✔
87	switch (iterator_type) {	2,255✔
88	case ITER_LT:
89	case ITER_LE:
90	return vy_range_tree_last(tree);	408✔
91	case ITER_GT:
92	case ITER_GE:
93	case ITER_EQ:
94	return vy_range_tree_first(tree);	1,847✔
95	default:
96	unreachable();	×
97	return NULL;
98	}
99	}
100	struct vy_range *range;
101	if (iterator_type == ITER_GE \|\| iterator_type == ITER_GT \|\|	472,130✔
102	iterator_type == ITER_EQ) {
103	/**
104	* Case 1. part_count == 1, looking for [10]. ranges:
105	* {1, 3, 5} {7, 8, 9} {10, 15 20} {22, 32, 42}
106	* ^looking for this
107	* Case 2. part_count == 1, looking for [10]. ranges:
108	* {1, 2, 4} {5, 6, 7, 8} {50, 100, 200}
109	* ^looking for this
110	* Case 3. part_count == 2, looking for [10]. ranges:
111	* {[1, 2], [2, 3]} {[9, 1], [10, 1], [10 2], [11 3]} {[12,..}
112	* ^looking for this
113	* Case 4. part_count == 2, looking for [10]. ranges:
114	* {[1, 2], [10, 1]} {[10, 2] [10 3] [11 3]} {[12, 1]..}
115	* ^looking for this
116	* Case 5. part_count does not matter, looking for [10].
117	* ranges:
118	* {100, 200}, {300, 400}
119	* ^looking for this
120	*/
121	/**
122	* vy_range_tree_psearch finds least range with begin == key
123	* or previous if equal was not found
124	*/
125	range = vy_range_tree_psearch(tree, key);	468,391✔
126	/* switch to previous for case (4) */
127	if (range != NULL && range->begin != NULL &&	468,697✔
128	key_field_count < range->cmp_def->part_count &&	306✔
129	vy_stmt_compare_with_key(key, range->begin,	×
130	range->cmp_def) == 0)
131	range = vy_range_tree_prev(tree, range);	×
132	/* for case 5 or subcase of case 4 */
133	if (range == NULL)	936,782✔
134	range = vy_range_tree_first(tree);	×
135	} else {
136	assert(iterator_type == ITER_LT \|\| iterator_type == ITER_LE);	3,739✔
137	/**
138	* Case 1. part_count == 1, looking for [10]. ranges:
139	* {1, 3, 5} {7, 8, 9} {10, 15 20} {22, 32, 42}
140	* ^looking for this
141	* Case 2. part_count == 1, looking for [10]. ranges:
142	* {1, 2, 4} {5, 6, 7, 8} {50, 100, 200}
143	* ^looking for this
144	* Case 3. part_count == 2, looking for [10]. ranges:
145	* {[1, 2], [2, 3]} {[9, 1], [10, 1], [10 2], [11 3]} {[12,..}
146	* ^looking for this
147	* Case 4. part_count == 2, looking for [10]. ranges:
148	* {[1, 2], [10, 1]} {[10, 2] [10 3] [11 3]} {[12, 1]..}
149	* ^looking for this
150	* Case 5. part_count does not matter, looking for [10].
151	* ranges:
152	* {1, 2}, {3, 4, ..}
153	* ^looking for this
154	*/
155	/**
156	* vy_range_tree_nsearch finds most range with begin == key
157	* or next if equal was not found
158	*/
159	range = vy_range_tree_nsearch(tree, key);	3,739✔
160	if (range != NULL) {	3,739✔
161	/* fix curr_range for cases 2 and 3 */
162	if (range->begin != NULL &&	×
163	vy_stmt_compare_with_key(key, range->begin,	×
164	range->cmp_def) != 0) {
165	struct vy_range *prev;
166	prev = vy_range_tree_prev(tree, range);	×
167	if (prev != NULL)	×
168	range = prev;	×
169	}
170	} else {
171	/* Case 5 */
172	range = vy_range_tree_last(tree);	3,739✔
173	}
174	}
175	return range;	472,130✔
176	}
177
178	struct vy_range *
179	vy_range_new(int64_t id, struct tuple begin, struct tuple end,	1,514✔
180	const struct key_def *cmp_def)
181	{
182	struct vy_range range = calloc(1, sizeof(range));	1,514✔
183	if (range == NULL) {	1,514✔
184	diag_set(OutOfMemory, sizeof(*range),	×
185	"malloc", "struct vy_range");
186	return NULL;	×
187	}
188	range->id = id;	1,514✔
189	if (begin != NULL) {	1,514✔
190	tuple_ref(begin);	19✔
191	range->begin = begin;	19✔
192	}
193	if (end != NULL) {	1,514✔
194	tuple_ref(end);	18✔
195	range->end = end;	18✔
196	}
197	range->cmp_def = cmp_def;	1,514✔
198	rlist_create(&range->slices);	1,514✔
199	range->heap_node.pos = UINT32_MAX;	1,514✔
200	return range;	1,514✔
201	}
202
203	void
204	vy_range_delete(struct vy_range *range)	1,337✔
205	{
206	if (range->begin != NULL)	1,337✔
207	tuple_unref(range->begin);	10✔
208	if (range->end != NULL)	1,337✔
209	tuple_unref(range->end);	9✔
210
211	struct vy_slice slice, next_slice;
212	rlist_foreach_entry_safe(slice, &range->slices, in_range, next_slice)	1,511✔
213	vy_slice_delete(slice);	174✔
214
215	TRASH(range);	1,337✔
216	free(range);	1,337✔
217	}	1,337✔
218
219	int
220	vy_range_snprint(char buf, int size, const struct vy_range range)	360✔
221	{
222	int total = 0;	360✔
223	SNPRINT(total, snprintf, buf, size, "(");	360✔
224	if (range->begin != NULL)	360✔
225	SNPRINT(total, vy_key_snprint, buf, size,	30✔
226	tuple_data(range->begin));
227	else
228	SNPRINT(total, snprintf, buf, size, "-inf");	330✔
229	SNPRINT(total, snprintf, buf, size, "..");	360✔
230	if (range->end != NULL)	360✔
231	SNPRINT(total, vy_key_snprint, buf, size,	33✔
232	tuple_data(range->end));
233	else
234	SNPRINT(total, snprintf, buf, size, "inf");	327✔
235	SNPRINT(total, snprintf, buf, size, ")");	360✔
236	return total;	360✔
237	}
238
239	void
240	vy_range_add_slice(struct vy_range range, struct vy_slice slice)	787✔
241	{
242	rlist_add_entry(&range->slices, slice, in_range);	787✔
243	range->slice_count++;	787✔
244	vy_disk_stmt_counter_add(&range->count, &slice->count);	787✔
245	}	787✔
246
247	void
248	vy_range_add_slice_before(struct vy_range range, struct vy_slice slice,	155✔
249	struct vy_slice *next_slice)
250	{
251	rlist_add_tail(&next_slice->in_range, &slice->in_range);	155✔
252	range->slice_count++;	155✔
253	vy_disk_stmt_counter_add(&range->count, &slice->count);	155✔
254	}	155✔
255
256	void
257	vy_range_remove_slice(struct vy_range range, struct vy_slice slice)	629✔
258	{
259	assert(range->slice_count > 0);	629✔
260	assert(!rlist_empty(&range->slices));	629✔
261	rlist_del_entry(slice, in_range);	629✔
262	range->slice_count--;	629✔
263	vy_disk_stmt_counter_sub(&range->count, &slice->count);	629✔
264	}	629✔
265
266	/**
267	* To reduce write amplification caused by compaction, we follow
268	* the LSM tree design. Runs in each range are divided into groups
269	* called levels:
270	*
271	* level 1: runs 1 .. L_1
272	* level 2: runs L_1 + 1 .. L_2
273	* ...
274	* level N: runs L_{N-1} .. L_N
275	*
276	* where L_N is the total number of runs, N is the total number of
277	* levels, older runs have greater numbers. Runs at each subsequent
278	* are run_size_ratio times larger than on the previous one. When
279	* the number of runs at a level exceeds run_count_per_level, we
280	* compact all its runs along with all runs from the upper levels
281	* and in-memory indexes. Including previous levels into
282	* compaction is relatively cheap, because of the level size
283	* ratio.
284	*
285	* Given a range, this function computes the maximal level that needs
286	* to be compacted and sets @compact_priority to the number of runs in
287	* this level and all preceding levels.
288	*/
289	void
290	vy_range_update_compact_priority(struct vy_range *range,	850✔
291	const struct index_opts *opts)
292	{
293	assert(opts->run_count_per_level > 0);	850✔
294	assert(opts->run_size_ratio > 1);	850✔
295
296	range->compact_priority = 0;	850✔
297
298	/* Total number of checked runs. */
299	uint32_t total_run_count = 0;	850✔
300	/* The total size of runs checked so far. */
301	uint64_t total_size = 0;	850✔
302	/* Estimated size of a compacted run, if compaction is scheduled. */
303	uint64_t est_new_run_size = 0;	850✔
304	/* The number of runs at the current level. */
305	uint32_t level_run_count = 0;	850✔
306	/*
307	* The target (perfect) size of a run at the current level.
308	* For the first level, it's the size of the newest run.
309	* For lower levels it's computed as first level run size
310	* times run_size_ratio.
311	*/
312	uint64_t target_run_size = 0;	850✔
313
314	struct vy_slice *slice;
315	rlist_foreach_entry(slice, &range->slices, in_range) {	4,098✔
316	uint64_t size = slice->count.bytes_compressed;	3,248✔
317	/*
318	* The size of the first level is defined by
319	* the size of the most recent run.
320	*/
321	if (target_run_size == 0)	3,248✔
322	target_run_size = size;	833✔
323	total_size += size;	3,248✔
324	level_run_count++;	3,248✔
325	total_run_count++;	3,248✔
326	while (size > target_run_size) {	7,950✔
327	/*
328	* The run size exceeds the threshold
329	* set for the current level. Move this
330	* run down to a lower level. Switch the
331	* current level and reset the level run
332	* count.
333	*/
334	level_run_count = 1;	1,454✔
335	/*
336	* If we have already scheduled
337	* a compaction of an upper level, and
338	* estimated compacted run will end up at
339	* this level, include the new run into
340	* this level right away to avoid
341	* a cascading compaction.
342	*/
343	if (est_new_run_size > target_run_size)	1,454✔
344	level_run_count++;	374✔
345	/*
346	* Calculate the target run size for this
347	* level.
348	*/
349	target_run_size *= opts->run_size_ratio;	1,454✔
350	/*
351	* Keep pushing the run down until
352	* we find an appropriate level for it.
353	*/
354	}
355	if (level_run_count > opts->run_count_per_level) {	3,248✔
356	/*
357	* The number of runs at the current level
358	* exceeds the configured maximum. Arrange
359	* for compaction. We compact all runs at
360	* this level and upper levels.
361	*/
362	range->compact_priority = total_run_count;	1,200✔
363	est_new_run_size = total_size;	1,200✔
364	}
365	}
366	}	850✔
367
368	/**
369	* Return true and set split_key accordingly if the range needs to be
370	* split in two.
371	*
372	* - We should never split a range until it was merged at least once
373	* (actually, it should be a function of run_count_per_level/number
374	* of runs used for the merge: with low run_count_per_level it's more
375	* than once, with high run_count_per_level it's once).
376	* - We should use the last run size as the size of the range.
377	* - We should split around the last run middle key.
378	* - We should only split if the last run size is greater than
379	* 4/3 * range_size.
380	*/
381	bool
382	vy_range_needs_split(struct vy_range range, const struct index_opts opts,	186✔
383	const char **p_split_key)
384	{
385	struct vy_slice *slice;
386
387	/* The range hasn't been merged yet - too early to split it. */
388	if (range->n_compactions < 1)	186✔
389	return false;	52✔
390
391	/* Find the oldest run. */
392	assert(!rlist_empty(&range->slices));	134✔
393	slice = rlist_last_entry(&range->slices, struct vy_slice, in_range);	134✔
394
395	/* The range is too small to be split. */
396	if (slice->count.bytes_compressed < opts->range_size * 4 / 3)	134✔
397	return false;	96✔
398
399	/* Find the median key in the oldest run (approximately). */
400	struct vy_page_info *mid_page;
401	mid_page = vy_run_page_info(slice->run, slice->first_page_no +	76✔
402	(slice->last_page_no -	76✔
403	slice->first_page_no) / 2);	76✔
404
405	struct vy_page_info *first_page = vy_run_page_info(slice->run,	38✔
406	slice->first_page_no);
407
408	/* No point in splitting if a new range is going to be empty. */
409	if (key_compare(first_page->min_key, mid_page->min_key,	38✔
410	range->cmp_def) == 0)
411	return false;	30✔
412	/*
413	* In extreme cases the median key can be < the beginning
414	* of the slice, e.g.
415	*
416	* RUN:
417	* ... \|---- page N ----\|-- page N + 1 --\|-- page N + 2 --
418	* \| min_key = [10] \| min_key = [50] \| min_key = [100]
419	*
420	* SLICE:
421	* begin = [30], end = [70]
422	* first_page_no = N, last_page_no = N + 1
423	*
424	* which makes mid_page_no = N and mid_page->min_key = [10].
425	*
426	* In such cases there's no point in splitting the range.
427	*/
428	if (slice->begin != NULL && key_compare(mid_page->min_key,	9✔
429	tuple_data(slice->begin), range->cmp_def) <= 0)	1✔
430	return false;	×
431	/*
432	* The median key can't be >= the end of the slice as we
433	* take the min key of a page for the median key.
434	*/
435	assert(slice->end == NULL \|\| key_compare(mid_page->min_key,	8✔
436	tuple_data(slice->end), range->cmp_def) < 0);
437
438	*p_split_key = mid_page->min_key;	8✔
439	return true;	8✔
440	}
441
442	/**
443	* Check if a range should be coalesced with one or more its neighbors.
444	* If it should, return true and set @p_first and @p_last to the first
445	* and last ranges to coalesce, otherwise return false.
446	*
447	* We coalesce ranges together when they become too small, less than
448	* half the target range size to avoid split-coalesce oscillations.
449	*/
450	bool
451	vy_range_needs_coalesce(struct vy_range range, vy_range_tree_t tree,	178✔
452	const struct index_opts *opts,
453	struct vy_range p_first, struct vy_range p_last)
454	{
455	struct vy_range *it;
456
457	/* Size of the coalesced range. */
458	uint64_t total_size = range->count.bytes_compressed;	178✔
459	/* Coalesce ranges until total_size > max_size. */
460	uint64_t max_size = opts->range_size / 2;	178✔
461
462	/*
463	* We can't coalesce a range that was scheduled for dump
464	* or compaction, because it is about to be processed by
465	* a worker thread.
466	*/
467	assert(!vy_range_is_scheduled(range));	178✔
468
469	p_first = p_last = range;	178✔
470	for (it = vy_range_tree_next(tree, range);	359✔
471	it != NULL && !vy_range_is_scheduled(it);	19✔
472	it = vy_range_tree_next(tree, it)) {	3✔
473	uint64_t size = it->count.bytes_compressed;	18✔
474	if (total_size + size > max_size)	18✔
475	break;	15✔
476	total_size += size;	3✔
477	*p_last = it;	3✔
478	}
479	for (it = vy_range_tree_prev(tree, range);	356✔
480	it != NULL && !vy_range_is_scheduled(it);	15✔
481	it = vy_range_tree_prev(tree, it)) {	×
482	uint64_t size = it->count.bytes_compressed;	7✔
483	if (total_size + size > max_size)	7✔
484	break;	7✔
485	total_size += size;	×
486	*p_first = it;	×
487	}
488	return p_first != p_last;	178✔
489	}
490
491	void
492	vy_range_iterator_open(struct vy_range_iterator itr, vy_range_tree_t tree,	474,268✔
493	enum iterator_type iterator_type,
494	const struct tuple *key)
495	{
496	itr->tree = tree;	474,268✔
497	itr->iterator_type = iterator_type;	474,268✔
498	itr->key = key;	474,268✔
499	itr->curr_range = NULL;	474,268✔
500	}	474,268✔
501
502	void
503	vy_range_iterator_next(struct vy_range_iterator itr, struct vy_range *result)	756,732✔
504	{
505	struct vy_range *curr = itr->curr_range;	756,732✔
506	struct vy_range *next;
507
508	if (curr == NULL) {	756,732✔
509	/* First iteration */
510	next = vy_range_tree_find_by_key(itr->tree, itr->iterator_type,	474,268✔
511	itr->key);
512	goto out;	474,268✔
513	}
514	switch (itr->iterator_type) {	282,464✔
515	case ITER_LT:
516	case ITER_LE:
517	next = vy_range_tree_prev(itr->tree, curr);	3,729✔
518	break;	3,729✔
519	case ITER_GT:
520	case ITER_GE:
521	next = vy_range_tree_next(itr->tree, curr);	4,722✔
522	break;	4,722✔
523	case ITER_EQ:
524	if (curr->end != NULL &&	274,013✔
525	vy_stmt_compare_with_key(itr->key, curr->end,	×
526	curr->cmp_def) >= 0) {
527	/* A partial key can be found in more than one range. */
528	next = vy_range_tree_next(itr->tree, curr);	×
529	} else {
530	next = NULL;	274,013✔
531	}
532	break;	274,013✔
533	default:
534	unreachable();	×
535	}
536	out:
537	*result = itr->curr_range = next;	756,732✔
538	}	756,732✔
539
540	void
541	vy_range_iterator_restore(struct vy_range_iterator *itr,	117✔
542	const struct tuple *last_stmt,
543	struct vy_range **result)
544	{
545	struct vy_range *curr = vy_range_tree_find_by_key(itr->tree,	117✔
546	itr->iterator_type,
547	last_stmt != NULL ? last_stmt : itr->key);
548	*result = itr->curr_range = curr;	117✔
549	}	117✔

tarantool / tarantool / 9054

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous