• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / ZODB / 8679957491

30 Mar 2024 11:42AM CUT coverage: 83.744%. Remained the same
8679957491

push

github

dataflake
- vb [ci skip]

2878 of 4051 branches covered (71.04%)

13348 of 15939 relevant lines covered (83.74%)

0.84 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/ZODB/scripts/migrate.py
1
#!/usr/bin/env python
2
##############################################################################
3
#
4
# Copyright (c) 2001, 2002, 2003 Zope Foundation and Contributors.
5
# All Rights Reserved.
6
#
7
# This software is subject to the provisions of the Zope Public License,
8
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
9
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
10
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
11
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
12
# FOR A PARTICULAR PURPOSE
13
#
14
##############################################################################
15
"""A script to gather statistics while doing a storage migration.
×
16

17
This is very similar to a standard storage's copyTransactionsFrom() method,
18
except that it's geared to run as a script, and it collects useful pieces of
19
information as it's working.  This script can be used to stress test a storage
20
since it blasts transactions at it as fast as possible.  You can get a good
21
sense of the performance of a storage by running this script.
22

23
Actually it just counts the size of pickles in the transaction via the
24
iterator protocol, so storage overheads aren't counted.
25

26
Usage: %(PROGRAM)s [options] [source-storage-args] [destination-storage-args]
27
Options:
28
    -S sourcetype
29
    --stype=sourcetype
30
        This is the name of a recognized type for the source database.  Use -T
31
        to print out the known types.  Defaults to "file".
32

33
    -D desttype
34
    --dtype=desttype
35
        This is the name of the recognized type for the destination database.
36
        Use -T to print out the known types.  Defaults to "file".
37

38
    -o filename
39
    --output=filename
40
        Print results in filename, otherwise stdout.
41

42
    -m txncount
43
    --max=txncount
44
        Stop after committing txncount transactions.
45

46
    -k txncount
47
    --skip=txncount
48
        Skip the first txncount transactions.
49

50
    -p/--profile
51
        Turn on specialized profiling.
52

53
    -t/--timestamps
54
        Print tids as timestamps.
55

56
    -T/--storage_types
57
        Print all the recognized storage types and exit.
58

59
    -v/--verbose
60
        Turns on verbose output.  Multiple -v options increase the verbosity.
61

62
    -h/--help
63
        Print this message and exit.
64

65
Positional arguments:
66

67
    source-storage-args:
68
        Semicolon separated list of arguments for the source storage, as
69
        key=val pairs.  E.g. "file_name=Data.fs;read_only=1"
70

71
    destination-storage-args:
72
        Comma separated list of arguments for the source storage, as key=val
73
        pairs.  E.g. "name=full;frequency=3600"
74
"""
75

76
import getopt
×
77
import marshal
×
78
import profile
×
79
import re
×
80
import sys
×
81
import time
×
82

83
from persistent.timestamp import TimeStamp
×
84

85
from ZODB import StorageTypes  # XXX: This import does not exist
×
86
from ZODB import utils
×
87

88

89
PROGRAM = sys.argv[0]
×
90

91

92
def usage(code, msg=''):
×
93
    print(__doc__ % globals(), file=sys.stderr)
×
94
    if msg:
×
95
        print(msg, file=sys.stderr)
×
96
    sys.exit(code)
×
97

98

99
def error(code, msg):
×
100
    print(msg, file=sys.stderr)
×
101
    print("use --help for usage message")
×
102
    sys.exit(code)
×
103

104

105
def main():
×
106
    try:
×
107
        opts, args = getopt.getopt(
×
108
            sys.argv[1:],
109
            'hvo:pm:k:D:S:Tt',
110
            ['help', 'verbose',
111
             'output=', 'profile', 'storage_types',
112
             'max=', 'skip=', 'dtype=', 'stype=', 'timestamps'])
113
    except getopt.error as msg:
×
114
        error(2, msg)
×
115

116
    class Options:
×
117
        stype = 'FileStorage'
×
118
        dtype = 'FileStorage'
×
119
        verbose = 0
×
120
        outfile = None
×
121
        profilep = False
×
122
        maxtxn = -1
×
123
        skiptxn = -1
×
124
        timestamps = False
×
125

126
    options = Options()
×
127

128
    for opt, arg in opts:
×
129
        if opt in ('-h', '--help'):
×
130
            usage(0)
×
131
        elif opt in ('-v', '--verbose'):
×
132
            options.verbose += 1
×
133
        elif opt in ('-T', '--storage_types'):
×
134
            print('Unknown option.')
×
135
            sys.exit(0)
×
136
        elif opt in ('-S', '--stype'):
×
137
            options.stype = arg
×
138
        elif opt in ('-D', '--dtype'):
×
139
            options.dtype = arg
×
140
        elif opt in ('-o', '--output'):
×
141
            options.outfile = arg
×
142
        elif opt in ('-p', '--profile'):
×
143
            options.profilep = True
×
144
        elif opt in ('-m', '--max'):
×
145
            options.maxtxn = int(arg)
×
146
        elif opt in ('-k', '--skip'):
×
147
            options.skiptxn = int(arg)
×
148
        elif opt in ('-t', '--timestamps'):
×
149
            options.timestamps = True
×
150

151
    if len(args) > 2:
×
152
        error(2, "too many arguments")
×
153

154
    srckws = {}
×
155
    if len(args) > 0:
×
156
        srcargs = args[0]
×
157
        for kv in re.split(r';\s*', srcargs):
×
158
            key, val = kv.split('=')
×
159
            srckws[key] = val
×
160

161
    destkws = {}
×
162
    if len(args) > 1:
×
163
        destargs = args[1]
×
164
        for kv in re.split(r';\s*', destargs):
×
165
            key, val = kv.split('=')
×
166
            destkws[key] = val
×
167

168
    if options.stype not in StorageTypes.storage_types.keys():
×
169
        usage(2, 'Source database type must be provided')
×
170
    if options.dtype not in StorageTypes.storage_types.keys():
×
171
        usage(2, 'Destination database type must be provided')
×
172

173
    # Open the output file
174
    if options.outfile is None:
×
175
        options.outfp = sys.stdout
×
176
        options.outclosep = False
×
177
    else:
178
        options.outfp = open(options.outfile, 'w')
×
179
        options.outclosep = True
×
180

181
    if options.verbose > 0:
×
182
        print('Opening source database...')
×
183
    modname, sconv = StorageTypes.storage_types[options.stype]
×
184
    kw = sconv(**srckws)
×
185
    __import__(modname)
×
186
    sclass = getattr(sys.modules[modname], options.stype)
×
187
    srcdb = sclass(**kw)
×
188

189
    if options.verbose > 0:
×
190
        print('Opening destination database...')
×
191
    modname, dconv = StorageTypes.storage_types[options.dtype]
×
192
    kw = dconv(**destkws)
×
193
    __import__(modname)
×
194
    dclass = getattr(sys.modules[modname], options.dtype)
×
195
    dstdb = dclass(**kw)
×
196

197
    try:
×
198
        t0 = time.time()
×
199
        doit(srcdb, dstdb, options)
×
200
        t1 = time.time()
×
201
        if options.verbose > 0:
×
202
            print('Migration time:          %8.3f' % (t1-t0))
×
203
    finally:
204
        # Done
205
        srcdb.close()
×
206
        dstdb.close()
×
207
        if options.outclosep:
×
208
            options.outfp.close()
×
209

210

211
def doit(srcdb, dstdb, options):
×
212
    outfp = options.outfp
×
213
    profilep = options.profilep
×
214
    verbose = options.verbose
×
215
    # some global information
216
    largest_pickle = 0
×
217
    largest_txn_in_size = 0
×
218
    largest_txn_in_objects = 0
×
219
    total_pickle_size = 0
×
220
    total_object_count = 0
×
221
    # Ripped from BaseStorage.copyTransactionsFrom()
222
    ts = None
×
223
    ok = True
×
224
    prevrevids = {}
×
225
    counter = 0
×
226
    skipper = 0
×
227
    if options.timestamps:
×
228
        print("%4s. %26s %6s %8s %5s %5s %5s %5s %5s" % (
×
229
            "NUM", "TID AS TIMESTAMP", "OBJS", "BYTES",
230
            # Does anybody know what these times mean?
231
            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3"))
232
    else:
233
        print("%4s. %20s %6s %8s %6s %6s %6s %6s %6s" % (
×
234
            "NUM", "TRANSACTION ID", "OBJS", "BYTES",
235
            # Does anybody know what these times mean?
236
            "t4-t0", "t1-t0", "t2-t1", "t3-t2", "t4-t3"))
237
    for txn in srcdb.iterator():
×
238
        skipper += 1
×
239
        if skipper <= options.skiptxn:
×
240
            continue
×
241
        counter += 1
×
242
        if counter > options.maxtxn >= 0:
×
243
            break
×
244
        tid = txn.tid
×
245
        if ts is None:
×
246
            ts = TimeStamp(tid)
×
247
        else:
248
            t = TimeStamp(tid)
×
249
            if t <= ts:
×
250
                if ok:
×
251
                    print('Time stamps are out of order {}, {}'.format(ts, t),
×
252
                          file=sys.stderr)
253
                    ok = False
×
254
                    ts = t.laterThan(ts)
×
255
                    tid = ts.raw()
×
256
                else:
257
                    ts = t
×
258
                    if not ok:
×
259
                        print('Time stamps are back in order %s' % t,
×
260
                              file=sys.stderr)
261
                        ok = True
×
262
        if verbose > 1:
×
263
            print(ts)
×
264

265
        prof = None
×
266
        if profilep and (counter % 100) == 0:
×
267
            prof = profile.Profile()
×
268
        objects = 0
×
269
        size = 0
×
270
        newrevids = RevidAccumulator()
×
271
        t0 = time.time()
×
272
        dstdb.tpc_begin(txn, tid, txn.status)
×
273
        t1 = time.time()
×
274
        for r in txn:
×
275
            oid = r.oid
×
276
            objects += 1
×
277
            thissize = len(r.data)
×
278
            size += thissize
×
279
            if thissize > largest_pickle:
×
280
                largest_pickle = thissize
×
281
            if verbose > 1:
×
282
                if not r.version:
×
283
                    vstr = 'norev'
×
284
                else:
285
                    vstr = r.version
×
286
                print(utils.U64(oid), vstr, len(r.data))
×
287
            oldrevid = prevrevids.get(oid, utils.z64)
×
288
            result = dstdb.store(oid, oldrevid, r.data, r.version, txn)
×
289
            newrevids.store(oid, result)
×
290
        t2 = time.time()
×
291
        result = dstdb.tpc_vote(txn)
×
292
        t3 = time.time()
×
293
        newrevids.tpc_vote(result)
×
294
        prevrevids.update(newrevids.get_dict())
×
295
        # Profile every 100 transactions
296
        if prof:
×
297
            prof.runcall(dstdb.tpc_finish, txn)
×
298
        else:
299
            dstdb.tpc_finish(txn)
×
300
        t4 = time.time()
×
301

302
        # record the results
303
        if objects > largest_txn_in_objects:
×
304
            largest_txn_in_objects = objects
×
305
        if size > largest_txn_in_size:
×
306
            largest_txn_in_size = size
×
307
        if options.timestamps:
×
308
            tidstr = str(TimeStamp(tid))
×
309
            format = "%4d. %26s %6d %8d %5.3f %5.3f %5.3f %5.3f %5.3f"
×
310
        else:
311
            tidstr = utils.U64(tid)
×
312
            format = "%4d. %20s %6d %8d %6.4f %6.4f %6.4f %6.4f %6.4f"
×
313
        print(format % (skipper, tidstr, objects, size,
×
314
                        t4-t0, t1-t0, t2-t1, t3-t2, t4-t3), file=outfp)
315
        total_pickle_size += size
×
316
        total_object_count += objects
×
317

318
        if prof:
×
319
            prof.create_stats()
×
320
            fp = open('profile-%02d.txt' % (counter / 100), 'wb')
×
321
            marshal.dump(prof.stats, fp)
×
322
            fp.close()
×
323
    print("Largest pickle:          %8d" % largest_pickle, file=outfp)
×
324
    print("Largest transaction:     %8d" % largest_txn_in_size, file=outfp)
×
325
    print("Largest object count:    %8d" % largest_txn_in_objects, file=outfp)
×
326
    print("Total pickle size: %14d" % total_pickle_size, file=outfp)
×
327
    print("Total object count:      %8d" % total_object_count, file=outfp)
×
328

329

330
# helper to deal with differences between old-style store() return and
331
# new-style store() return that supports ZEO
332

333
class RevidAccumulator:
×
334

335
    def __init__(self):
×
336
        self.data = {}
×
337

338
    def _update_from_list(self, list):
×
339
        for oid, serial in list:
×
340
            if not isinstance(serial, str):
×
341
                raise serial
×
342
            self.data[oid] = serial
×
343

344
    def store(self, oid, result):
×
345
        if isinstance(result, str):
×
346
            self.data[oid] = result
×
347
        elif result is not None:
×
348
            self._update_from_list(result)
×
349

350
    def tpc_vote(self, result):
×
351
        if result is not None:
×
352
            self._update_from_list(result)
×
353

354
    def get_dict(self):
×
355
        return self.data
×
356

357

358
if __name__ == '__main__':
359
    main()
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc