• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tilezen / mapbox-vector-tile / 3888698850

pending completion
3888698850

push

github

GitHub
Coordinates conversion (#127)

117 of 119 new or added lines in 5 files covered. (98.32%)

1 existing line in 1 file now uncovered.

579 of 730 relevant lines covered (79.32%)

3.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/mapbox_vector_tile/optimise.py
1
from collections import namedtuple
×
2

NEW
3
from mapbox_vector_tile.Mapbox import vector_tile_pb2 as vector_tile
×
4
from mapbox_vector_tile.utils import CMD_LINE_TO, CMD_MOVE_TO, zig_zag_decode, zig_zag_encode
×
5

6

7
class StringTableOptimiser:
×
8
    """
9
    Optimizes the order of keys and values in the MVT layer string table.
10

11
    Counts the number of times an entry in the MVT string table (both keys and values) is used. Then reorders the
12
    string table to have the most commonly used entries first and updates the features to use the replacement
13
    locations in the table. This can save several percent in a tile with large numbers of features.
14
    """
15

16
    def __init__(self):
×
17
        self.key_counts = {}
×
18
        self.val_counts = {}
×
19

20
    def add_tags(self, feature_tags):
×
21
        itr = iter(feature_tags)
×
22
        for k, v in zip(itr, itr):
×
23
            self.key_counts[k] = self.key_counts.get(k, 0) + 1
×
24
            self.val_counts[v] = self.val_counts.get(v, 0) + 1
×
25

26
    @staticmethod
×
UNCOV
27
    def _update_table(counts, table):
×
28
        # Sort string table by usage, so most commonly-used values are
29
        # assigned the smallest indices. Since indices are encoded as
30
        # varints, this should make best use of the space.
31
        sort = list(sorted(((c, k) for k, c in counts.items()), reverse=True))
×
32

33
        # construct the re-ordered string table
34
        new_table = []
×
35
        for _, x in sort:
×
36
            new_table.append(table[x])
×
37
        assert len(new_table) == len(table)
×
38

39
        # delete table in place and replace with the new table
40
        del table[:]
×
41
        table.extend(new_table)
×
42

43
        # construct a lookup table from the old to the new indices.
44
        new_indexes = {}
×
45
        for i, (c, k) in enumerate(sort):
×
46
            new_indexes[k] = i
×
47

48
        return new_indexes
×
49

50
    def update_string_table(self, layer):
×
51
        new_key = self._update_table(self.key_counts, layer.keys)
×
52
        new_val = self._update_table(self.val_counts, layer.values)
×
53

54
        for feature in layer.features:
×
55
            for i in range(0, len(feature.tags), 2):
×
56
                feature.tags[i] = new_key[feature.tags[i]]
×
57
                feature.tags[i + 1] = new_val[feature.tags[i + 1]]
×
58

59

60
# We assume that every linestring consists of a single MoveTo command followed by some number of LineTo commands,
61
# and we encode this as a Line object.
62
#
63
# Normally, MVT linestrings are encoded relative to the preceding linestring (if any) in the geometry. However,
64
# that's awkward for reordering, so we construct an absolute MoveTo for each Line. We also derive a corresponding
65
# EndsAt location, which isn't used in the encoding, but simplifies analysis.
66
MoveTo = namedtuple("MoveTo", "x y")
×
67
EndsAt = namedtuple("EndsAt", "x y")
×
68
Line = namedtuple("Line", "moveto endsat cmds")
×
69

70

71
def _decode_lines(geom):
×
72
    """
73
    Decode a linear MVT geometry into a list of Lines.
74

75
    Each individual linestring in the MVT is extracted to a separate entry in the list of lines.
76
    """
77

78
    lines = []
×
79
    current_line = []
×
80
    current_moveto = None
×
81

82
    # To keep track of the position. We'll adapt the move-to commands to all be relative to 0,0 at the beginning of
83
    # each linestring.
84
    x = 0
×
85
    y = 0
×
86

87
    end = len(geom)
×
88
    i = 0
×
89
    while i < end:
×
90
        header = geom[i]
×
91
        cmd = header & 7
×
92
        run_length = header // 8
×
93

94
        if cmd == CMD_MOVE_TO:
×
95
            # flush previous line.
96
            if current_moveto:
×
97
                lines.append(Line(current_moveto, EndsAt(x, y), current_line))
×
98
                current_line = []
×
99

100
            assert run_length == 1
×
101
            x += zig_zag_decode(geom[i + 1])
×
102
            y += zig_zag_decode(geom[i + 2])
×
103
            i += 3
×
104

105
            current_moveto = MoveTo(x, y)
×
106

107
        elif cmd == CMD_LINE_TO:
×
108
            assert current_moveto
×
109

110
            # we just copy this run, since it's encoding isn't going to change
111
            next_i = i + 1 + run_length * 2
×
112
            current_line.extend(geom[i:next_i])
×
113

114
            # but we still need to decode it to figure out where each move-to command is in absolute space.
115
            for j in range(0, run_length):
×
116
                dx = zig_zag_decode(geom[i + 1 + 2 * j])
×
117
                dy = zig_zag_decode(geom[i + 2 + 2 * j])
×
118
                x += dx
×
119
                y += dy
×
120

121
            i = next_i
×
122

123
        else:
124
            raise ValueError(f"Unhandled command: {cmd}")
×
125

126
    if current_line:
×
127
        assert current_moveto
×
128
        lines.append(Line(current_moveto, EndsAt(x, y), current_line))
×
129

130
    return lines
×
131

132

133
def _reorder_lines(lines):
×
134
    """
135
    Reorder lines so that the distance from the end of one to the beginning of the next is minimized.
136
    """
137

138
    x = 0
×
139
    y = 0
×
140
    new_lines = []
×
141

142
    # treat the list of lines as a stack, off which we keep popping the best one to add next.
143
    while lines:
×
144
        # looping over all the lines like this isn't terribly efficient, but in local tests seems to handle a few
145
        # thousand lines without a problem.
146
        min_dist = None
×
147
        min_i = None
×
148
        for i, line in enumerate(lines):
×
149
            moveto, _, _ = line
×
150

151
            dist = abs(moveto.x - x) + abs(moveto.y - y)
×
152
            if min_dist is None or dist < min_dist:
×
153
                min_dist = dist
×
154
                min_i = i
×
155

156
        assert min_i is not None
×
157
        line = lines.pop(min_i)
×
158
        _, endsat, _ = line
×
159
        x = endsat.x
×
160
        y = endsat.y
×
161
        new_lines.append(line)
×
162

163
    return new_lines
×
164

165

166
def _rewrite_geometry(geom, new_lines):
×
167
    """
168
    Re-encode a list of Lines with absolute MoveTos as a continuous stream of MVT geometry commands, each relative to
169
    the last. Replace geom with that stream.
170
    """
171

172
    new_geom = []
×
173
    x = 0
×
174
    y = 0
×
175
    for line in new_lines:
×
176
        moveto, endsat, lineto_cmds = line
×
177

178
        dx = moveto.x - x
×
179
        dy = moveto.y - y
×
180
        x = endsat.x
×
181
        y = endsat.y
×
182

183
        new_geom.append(9)  # move to, run_length = 1
×
184
        new_geom.append(zig_zag_encode(dx))
×
185
        new_geom.append(zig_zag_encode(dy))
×
186
        new_geom.extend(lineto_cmds)
×
187

188
    # write the lines back out to geom
189
    del geom[:]
×
190
    geom.extend(new_geom)
×
191

192

193
def optimise_multilinestring(geom):
×
194
    # Split the geometry into multiple lists, each starting with a move-to command and consisting otherwise of
195
    # line-to commands. (perhaps with a close at the end? Is that allowed for linestrings?)
196

197
    lines = _decode_lines(geom)
×
198

199
    # can't reorder anything unless it has multiple lines.
200
    if len(lines) > 1:
×
201
        lines = _reorder_lines(lines)
×
202
        _rewrite_geometry(geom, lines)
×
203

204

205
def optimise_tile(tile_bytes):
×
206
    """
207
    Decode a sequence of bytes as an MVT tile and reorder the string table of its layers and the order of its
208
    multilinestrings to save a few bytes.
209
    """
210

NEW
211
    t = vector_tile.tile()
×
212
    t.ParseFromString(tile_bytes)
×
213

214
    for layer in t.layers:
×
215
        sto = StringTableOptimiser()
×
216

217
        for feature in layer.features:
×
218
            # (multi)linestrings only
219
            if feature.type == 2:
×
220
                optimise_multilinestring(feature.geometry)
×
221

222
            sto.add_tags(feature.tags)
×
223

224
        sto.update_string_table(layer)
×
225

226
    return t.SerializeToString()
×
227

228

229
if __name__ == "__main__":
×
230
    import argparse
×
231
    import sys
×
232

233
    parser = argparse.ArgumentParser()
×
234
    parser.add_argument("input_file", help="Input MVT file", type=argparse.FileType("r"))
×
235
    parser.add_argument(
×
236
        "--output-file", help="Output file, default is stdout", type=argparse.FileType("w"), default=sys.stdout
237
    )
238
    args = parser.parse_args()
×
239

240
    output_bytes = optimise_tile(args.input_file.read())
×
241
    args.output_file.write(output_bytes)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc