• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tilezen / mapbox-vector-tile / 3746140319

pending completion
3746140319

push

github

GitHub
Pre-commit tools (#121)

74 of 96 new or added lines in 6 files covered. (77.08%)

2 existing lines in 1 file now uncovered.

558 of 727 relevant lines covered (76.75%)

3.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/mapbox_vector_tile/optimise.py
1
from collections import namedtuple
×
2

NEW
3
from mapbox_vector_tile.Mapbox.vector_tile_pb2 import tile
×
4

5

NEW
6
class StringTableOptimiser:
×
7
    """
8
    Optimises the order of keys and values in the MVT layer string table.
9

10
    Counts the number of times an entry in the MVT string table (both keys
11
    and values) is used. Then reorders the string table to have the most
12
    commonly used entries first and updates the features to use the
13
    replacement locations in the table. This can save several percent in a
14
    tile with large numbers of features.
15
    """
16

17
    def __init__(self):
×
18
        self.key_counts = {}
×
19
        self.val_counts = {}
×
20

21
    def add_tags(self, feature_tags):
×
22
        itr = iter(feature_tags)
×
23
        for k, v in zip(itr, itr):
×
24
            self.key_counts[k] = self.key_counts.get(k, 0) + 1
×
25
            self.val_counts[v] = self.val_counts.get(v, 0) + 1
×
26

27
    def _update_table(self, counts, table):
×
28
        # sort string table by usage, so most commonly-used values are
29
        # assigned the smallest indices. since indices are encoded as
30
        # varints, this should make best use of the space.
NEW
31
        sort = list(sorted(((c, k) for k, c in counts.iteritems()), reverse=True))
×
32

33
        # construct the re-ordered string table
34
        new_table = []
×
35
        for _, x in sort:
×
36
            new_table.append(table[x])
×
37
        assert len(new_table) == len(table)
×
38

39
        # delete table in place and replace with the new table
40
        del table[:]
×
41
        table.extend(new_table)
×
42

43
        # construct a lookup table from the old to the new indices.
44
        new_indexes = {}
×
45
        for i, (c, k) in enumerate(sort):
×
46
            new_indexes[k] = i
×
47

48
        return new_indexes
×
49

50
    def update_string_table(self, layer):
×
51
        new_key = self._update_table(self.key_counts, layer.keys)
×
52
        new_val = self._update_table(self.val_counts, layer.values)
×
53

54
        for feature in layer.features:
×
NEW
55
            for i in range(0, len(feature.tags), 2):
×
56
                feature.tags[i] = new_key[feature.tags[i]]
×
NEW
57
                feature.tags[i + 1] = new_val[feature.tags[i + 1]]
×
58

59

60
# return the signed integer corresponding to the "zig-zag" encoded unsigned
61
# input integer. this encoding is used for MVT geometry deltas.
62
def unzigzag(n):
×
63
    return (n >> 1) ^ (-(n & 1))
×
64

65

66
# return the "zig-zag" encoded unsigned integer corresponding to the signed
67
# input integer. this encoding is used for MVT geometry deltas.
68
def zigzag(n):
×
69
    return (n << 1) ^ (n >> 31)
×
70

71

72
# we assume that every linestring consists of a single MoveTo command followed
73
# by some number of LineTo commands, and we encode this as a Line object.
74
#
75
# normally, MVT linestrings are encoded relative to the preceding linestring
76
# (if any) in the geometry. however that's awkward for reodering, so we
77
# construct an absolute MoveTo for each Line. we also derive a corresponding
78
# EndsAt location, which isn't used in the encoding, but simplifies analysis.
NEW
79
MoveTo = namedtuple("MoveTo", "x y")
×
NEW
80
EndsAt = namedtuple("EndsAt", "x y")
×
NEW
81
Line = namedtuple("Line", "moveto endsat cmds")
×
82

83

84
def _decode_lines(geom):
×
85
    """
86
    Decode a linear MVT geometry into a list of Lines.
87

88
    Each individual linestring in the MVT is extracted to a separate entry in
89
    the list of lines.
90
    """
91

92
    lines = []
×
93
    current_line = []
×
94
    current_moveto = None
×
95

96
    # to keep track of the position. we'll adapt the move-to commands to all
97
    # be relative to 0,0 at the beginning of each linestring.
98
    x = 0
×
99
    y = 0
×
100

101
    end = len(geom)
×
102
    i = 0
×
103
    while i < end:
×
104
        header = geom[i]
×
105
        cmd = header & 7
×
106
        run_length = header // 8
×
107

108
        if cmd == 1:  # move to
×
109
            # flush previous line.
110
            if current_moveto:
×
111
                lines.append(Line(current_moveto, EndsAt(x, y), current_line))
×
112
                current_line = []
×
113

114
            assert run_length == 1
×
NEW
115
            x += unzigzag(geom[i + 1])
×
NEW
116
            y += unzigzag(geom[i + 2])
×
UNCOV
117
            i += 3
×
118

119
            current_moveto = MoveTo(x, y)
×
120

121
        elif cmd == 2:  # line to
×
122
            assert current_moveto
×
123

124
            # we just copy this run, since it's encoding isn't going to change
125
            next_i = i + 1 + run_length * 2
×
126
            current_line.extend(geom[i:next_i])
×
127

128
            # but we still need to decode it to figure out where each move-to
129
            # command is in absolute space.
NEW
130
            for j in range(0, run_length):
×
131
                dx = unzigzag(geom[i + 1 + 2 * j])
×
132
                dy = unzigzag(geom[i + 2 + 2 * j])
×
133
                x += dx
×
134
                y += dy
×
135

136
            i = next_i
×
137

138
        else:
NEW
139
            raise ValueError(f"Unhandled command: {cmd}")
×
140

141
    if current_line:
×
142
        assert current_moveto
×
143
        lines.append(Line(current_moveto, EndsAt(x, y), current_line))
×
144

145
    return lines
×
146

147

148
def _reorder_lines(lines):
×
149
    """
150
    Reorder lines so that the distance from the end of one to the beginning of
151
    the next is minimised.
152
    """
153

154
    x = 0
×
155
    y = 0
×
156
    new_lines = []
×
157

158
    # treat the list of lines as a stack, off which we keep popping the best
159
    # one to add next.
160
    while lines:
×
161
        # looping over all the lines like this isn't terribly efficient, but
162
        # in local tests seems to handle a few thousand lines without a
163
        # problem.
164
        min_dist = None
×
165
        min_i = None
×
166
        for i, line in enumerate(lines):
×
167
            moveto, _, _ = line
×
168

169
            dist = abs(moveto.x - x) + abs(moveto.y - y)
×
170
            if min_dist is None or dist < min_dist:
×
171
                min_dist = dist
×
172
                min_i = i
×
173

174
        assert min_i is not None
×
175
        line = lines.pop(min_i)
×
176
        _, endsat, _ = line
×
177
        x = endsat.x
×
178
        y = endsat.y
×
179
        new_lines.append(line)
×
180

181
    return new_lines
×
182

183

184
def _rewrite_geometry(geom, new_lines):
×
185
    """
186
    Re-encode a list of Lines with absolute MoveTos as a continuous stream of
187
    MVT geometry commands, each relative to the last. Replace geom with that
188
    stream.
189
    """
190

191
    new_geom = []
×
192
    x = 0
×
193
    y = 0
×
194
    for line in new_lines:
×
195
        moveto, endsat, lineto_cmds = line
×
196

197
        dx = moveto.x - x
×
198
        dy = moveto.y - y
×
199
        x = endsat.x
×
200
        y = endsat.y
×
201

202
        new_geom.append(9)  # move to, run_length = 1
×
203
        new_geom.append(zigzag(dx))
×
204
        new_geom.append(zigzag(dy))
×
205
        new_geom.extend(lineto_cmds)
×
206

207
    # write the lines back out to geom
208
    del geom[:]
×
209
    geom.extend(new_geom)
×
210

211

212
def optimise_multilinestring(geom):
×
213
    # split the geometry into multiple lists, each starting with a move-to
214
    # command and consisting otherwise of line-to commands. (perhaps with
215
    # a close at the end? is that allowed for linestrings?)
216

217
    lines = _decode_lines(geom)
×
218

219
    # can't reorder anything unless it has multiple lines.
220
    if len(lines) > 1:
×
221
        lines = _reorder_lines(lines)
×
222
        _rewrite_geometry(geom, lines)
×
223

224

225
def optimise_tile(tile_bytes):
×
226
    """
227
    Decode a sequence of bytes as an MVT tile and reorder the string table of
228
    its layers and the order of its multilinestrings to save a few bytes.
229
    """
230

231
    t = tile()
×
232
    t.ParseFromString(tile_bytes)
×
233

234
    for layer in t.layers:
×
235
        sto = StringTableOptimiser()
×
236

237
        for feature in layer.features:
×
238
            # (multi)linestrings only
239
            if feature.type == 2:
×
240
                optimise_multilinestring(feature.geometry)
×
241

242
            sto.add_tags(feature.tags)
×
243

244
        sto.update_string_table(layer)
×
245

246
    return t.SerializeToString()
×
247

248

NEW
249
if __name__ == "__main__":
×
250
    import argparse
×
251
    import sys
×
252

253
    parser = argparse.ArgumentParser()
×
NEW
254
    parser.add_argument("input_file", help="Input MVT file", type=argparse.FileType("r"))
×
NEW
255
    parser.add_argument(
×
256
        "--output-file", help="Output file, default is stdout", type=argparse.FileType("w"), default=sys.stdout
257
    )
UNCOV
258
    args = parser.parse_args()
×
259

260
    output_bytes = optimise_tile(args.input_file.read())
×
261
    args.output_file.write(output_bytes)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc