• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pronovic / cedar-backup3 / 17814549622

18 Sep 2025 12:46AM UTC coverage: 73.311%. Remained the same
17814549622

Pull #56

github

web-flow
Merge 1968f9fca into 50daef4a0
Pull Request #56: Start addressing Ruff linter warnings

28 of 31 new or added lines in 20 files covered. (90.32%)

1 existing line in 1 file now uncovered.

7922 of 10806 relevant lines covered (73.31%)

2.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.62
/src/CedarBackup3/extend/split.py
1
# -*- coding: utf-8 -*-
2
# vim: set ft=python ts=4 sw=4 expandtab:
3
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
4
#
5
#              C E D A R
6
#          S O L U T I O N S       "Software done right."
7
#           S O F T W A R E
8
#
9
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
10
#
11
# Copyright (c) 2007,2010,2013,2015 Kenneth J. Pronovici.
12
# All rights reserved.
13
#
14
# This program is free software; you can redistribute it and/or
15
# modify it under the terms of the GNU General Public License,
16
# Version 2, as published by the Free Software Foundation.
17
#
18
# This program is distributed in the hope that it will be useful,
19
# but WITHOUT ANY WARRANTY; without even the implied warranty of
20
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
21
#
22
# Copies of the GNU General Public License are available from
23
# the Free Software Foundation website, http://www.gnu.org/.
24
#
25
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
26
#
27
# Author   : Kenneth J. Pronovici <pronovic@ieee.org>
28
# Language : Python 3
29
# Project  : Official Cedar Backup Extensions
30
# Purpose  : Provides an extension to split up large files in staging directories.
31
#
32
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
33

34
########################################################################
35
# Module documentation
36
########################################################################
37

38
"""
39
Provides an extension to split up large files in staging directories.
40

41
When this extension is executed, it will look through the configured Cedar
42
Backup staging directory for files exceeding a specified size limit, and split
43
them down into smaller files using the 'split' utility.  Any directory which
44
has already been split (as indicated by the ``cback.split`` file) will be
45
ignored.
46

47
This extension requires a new configuration section <split> and is intended
48
to be run immediately after the standard stage action or immediately before the
49
standard store action.  Aside from its own configuration, it requires the
50
options and staging configuration sections in the standard Cedar Backup
51
configuration file.
52

53
:author: Kenneth J. Pronovici <pronovic@ieee.org>
54
"""
55

56
########################################################################
57
# Imported modules
58
########################################################################
59

60
import logging
4✔
61
import os
4✔
62
import re
4✔
63
from functools import total_ordering
4✔
64

65
from CedarBackup3.actions.util import findDailyDirs, getBackupFiles, writeIndicatorFile
4✔
66
from CedarBackup3.config import ByteQuantity, addByteQuantityNode, readByteQuantity
4✔
67
from CedarBackup3.util import changeOwnership, executeCommand, resolveCommand
4✔
68
from CedarBackup3.xmlutil import addContainerNode, createInputDom, readFirstChild
4✔
69

70
########################################################################
71
# Module-wide constants and variables
72
########################################################################
73

74
logger = logging.getLogger("CedarBackup3.log.extend.split")
4✔
75

76
SPLIT_COMMAND = ["split"]
4✔
77
SPLIT_INDICATOR = "cback.split"
4✔
78

79

80
########################################################################
81
# SplitConfig class definition
82
########################################################################
83

84

85
@total_ordering
4✔
86
class SplitConfig(object):
4✔
87
    """
88
    Class representing split configuration.
89

90
    Split configuration is used for splitting staging directories.
91

92
    The following restrictions exist on data in this class:
93

94
       - The size limit must be a ByteQuantity
95
       - The split size must be a ByteQuantity
96

97
    """
98

99
    def __init__(self, sizeLimit=None, splitSize=None):
4✔
100
        """
101
        Constructor for the ``SplitCOnfig`` class.
102

103
        Args:
104
           sizeLimit: Size limit of the files, in bytes
105
           splitSize: Size that files exceeding the limit will be split into, in bytes
106

107
        Raises:
108
           ValueError: If one of the values is invalid
109
        """
110
        self._sizeLimit = None
4✔
111
        self._splitSize = None
4✔
112
        self.sizeLimit = sizeLimit
4✔
113
        self.splitSize = splitSize
4✔
114

115
    def __repr__(self):
4✔
116
        """
117
        Official string representation for class instance.
118
        """
119
        return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
4✔
120

121
    def __str__(self):
4✔
122
        """
123
        Informal string representation for class instance.
124
        """
125
        return self.__repr__()
4✔
126

127
    def __eq__(self, other):
4✔
128
        """Equals operator, iplemented in terms of original Python 2 compare operator."""
129
        return self.__cmp__(other) == 0
4✔
130

131
    def __lt__(self, other):
4✔
132
        """Less-than operator, iplemented in terms of original Python 2 compare operator."""
133
        return self.__cmp__(other) < 0
4✔
134

135
    def __gt__(self, other):
4✔
136
        """Greater-than operator, iplemented in terms of original Python 2 compare operator."""
137
        return self.__cmp__(other) > 0
4✔
138

139
    def __cmp__(self, other):
4✔
140
        """
141
        Original Python 2 comparison operator.
142
        Lists within this class are "unordered" for equality comparisons.
143
        Args:
144
           other: Other object to compare to
145
        Returns:
146
            -1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
147
        """
148
        if other is None:
4✔
149
            return 1
4✔
150
        if self.sizeLimit != other.sizeLimit:
4✔
151
            if (self.sizeLimit or ByteQuantity()) < (other.sizeLimit or ByteQuantity()):
4✔
152
                return -1
4✔
153
            else:
154
                return 1
×
155
        if self.splitSize != other.splitSize:
4✔
156
            if (self.splitSize or ByteQuantity()) < (other.splitSize or ByteQuantity()):
4✔
157
                return -1
4✔
158
            else:
159
                return 1
×
160
        return 0
4✔
161

162
    def _setSizeLimit(self, value):
4✔
163
        """
164
        Property target used to set the size limit.
165
        If not ``None``, the value must be a ``ByteQuantity`` object.
166
        Raises:
167
           ValueError: If the value is not a ``ByteQuantity``
168
        """
169
        if value is None:
4✔
170
            self._sizeLimit = None
4✔
171
        else:
172
            if not isinstance(value, ByteQuantity):
4✔
173
                raise ValueError("Value must be a ``ByteQuantity`` object.")
4✔
174
            self._sizeLimit = value
4✔
175

176
    def _getSizeLimit(self):
4✔
177
        """
178
        Property target used to get the size limit.
179
        """
180
        return self._sizeLimit
4✔
181

182
    def _setSplitSize(self, value):
4✔
183
        """
184
        Property target used to set the split size.
185
        If not ``None``, the value must be a ``ByteQuantity`` object.
186
        Raises:
187
           ValueError: If the value is not a ``ByteQuantity``
188
        """
189
        if value is None:
4✔
190
            self._splitSize = None
4✔
191
        else:
192
            if not isinstance(value, ByteQuantity):
4✔
193
                raise ValueError("Value must be a ``ByteQuantity`` object.")
4✔
194
            self._splitSize = value
4✔
195

196
    def _getSplitSize(self):
4✔
197
        """
198
        Property target used to get the split size.
199
        """
200
        return self._splitSize
4✔
201

202
    sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
4✔
203
    splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
4✔
204

205

206
########################################################################
207
# LocalConfig class definition
208
########################################################################
209

210

211
@total_ordering
4✔
212
class LocalConfig(object):
4✔
213
    """
214
    Class representing this extension's configuration document.
215

216
    This is not a general-purpose configuration object like the main Cedar
217
    Backup configuration object.  Instead, it just knows how to parse and emit
218
    split-specific configuration values.  Third parties who need to read and
219
    write configuration related to this extension should access it through the
220
    constructor, ``validate`` and ``addConfig`` methods.
221

222
    *Note:* Lists within this class are "unordered" for equality comparisons.
223

224
    """
225

226
    def __init__(self, xmlData=None, xmlPath=None, validate=True):
4✔
227
        """
228
        Initializes a configuration object.
229

230
        If you initialize the object without passing either ``xmlData`` or
231
        ``xmlPath`` then configuration will be empty and will be invalid until it
232
        is filled in properly.
233

234
        No reference to the original XML data or original path is saved off by
235
        this class.  Once the data has been parsed (successfully or not) this
236
        original information is discarded.
237

238
        Unless the ``validate`` argument is ``False``, the :any:`LocalConfig.validate`
239
        method will be called (with its default arguments) against configuration
240
        after successfully parsing any passed-in XML.  Keep in mind that even if
241
        ``validate`` is ``False``, it might not be possible to parse the passed-in
242
        XML document if lower-level validations fail.
243

244
        *Note:* It is strongly suggested that the ``validate`` option always be set
245
        to ``True`` (the default) unless there is a specific need to read in
246
        invalid configuration from disk.
247

248
        Args:
249
           xmlData (String data): XML data representing configuration
250
           xmlPath (Absolute path to a file on disk): Path to an XML file on disk
251
           validate (Boolean true/false): Validate the document after parsing it
252
        Raises:
253
           ValueError: If both ``xmlData`` and ``xmlPath`` are passed-in
254
           ValueError: If the XML data in ``xmlData`` or ``xmlPath`` cannot be parsed
255
           ValueError: If the parsed configuration document is not valid
256
        """
257
        self._split = None
4✔
258
        self.split = None
4✔
259
        if xmlData is not None and xmlPath is not None:
4✔
260
            raise ValueError("Use either xmlData or xmlPath, but not both.")
4✔
261
        if xmlData is not None:
4✔
262
            self._parseXmlData(xmlData)
4✔
263
            if validate:
4✔
264
                self.validate()
4✔
265
        elif xmlPath is not None:
4✔
266
            with open(xmlPath) as f:
4✔
267
                xmlData = f.read()
4✔
268
            self._parseXmlData(xmlData)
4✔
269
            if validate:
4✔
270
                self.validate()
4✔
271

272
    def __repr__(self):
4✔
273
        """
274
        Official string representation for class instance.
275
        """
276
        return "LocalConfig(%s)" % (self.split)
4✔
277

278
    def __str__(self):
4✔
279
        """
280
        Informal string representation for class instance.
281
        """
282
        return self.__repr__()
4✔
283

284
    def __eq__(self, other):
4✔
285
        """Equals operator, iplemented in terms of original Python 2 compare operator."""
286
        return self.__cmp__(other) == 0
4✔
287

288
    def __lt__(self, other):
4✔
289
        """Less-than operator, iplemented in terms of original Python 2 compare operator."""
290
        return self.__cmp__(other) < 0
4✔
291

292
    def __gt__(self, other):
4✔
293
        """Greater-than operator, iplemented in terms of original Python 2 compare operator."""
294
        return self.__cmp__(other) > 0
4✔
295

296
    def __cmp__(self, other):
4✔
297
        """
298
        Original Python 2 comparison operator.
299
        Lists within this class are "unordered" for equality comparisons.
300
        Args:
301
           other: Other object to compare to
302
        Returns:
303
            -1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
304
        """
305
        if other is None:
4✔
306
            return 1
×
307
        if self.split != other.split:
4✔
308
            if self.split < other.split:
4✔
309
                return -1
4✔
310
            else:
311
                return 1
×
312
        return 0
4✔
313

314
    def _setSplit(self, value):
4✔
315
        """
316
        Property target used to set the split configuration value.
317
        If not ``None``, the value must be a ``SplitConfig`` object.
318
        Raises:
319
           ValueError: If the value is not a ``SplitConfig``
320
        """
321
        if value is None:
4✔
322
            self._split = None
4✔
323
        else:
324
            if not isinstance(value, SplitConfig):
4✔
325
                raise ValueError("Value must be a ``SplitConfig`` object.")
4✔
326
            self._split = value
4✔
327

328
    def _getSplit(self):
4✔
329
        """
330
        Property target used to get the split configuration value.
331
        """
332
        return self._split
4✔
333

334
    split = property(_getSplit, _setSplit, None, "Split configuration in terms of a ``SplitConfig`` object.")
4✔
335

336
    def validate(self):
4✔
337
        """
338
        Validates configuration represented by the object.
339

340
        Split configuration must be filled in.  Within that, both the size limit
341
        and split size must be filled in.
342

343
        Raises:
344
           ValueError: If one of the validations fails
345
        """
346
        if self.split is None:
4✔
347
            raise ValueError("Split section is required.")
4✔
348
        if self.split.sizeLimit is None:
4✔
349
            raise ValueError("Size limit must be set.")
4✔
350
        if self.split.splitSize is None:
4✔
351
            raise ValueError("Split size must be set.")
4✔
352

353
    def addConfig(self, xmlDom, parentNode):
4✔
354
        """
355
        Adds a <split> configuration section as the next child of a parent.
356

357
        Third parties should use this function to write configuration related to
358
        this extension.
359

360
        We add the following fields to the document::
361

362
           sizeLimit      //cb_config/split/size_limit
363
           splitSize      //cb_config/split/split_size
364

365
        Args:
366
           xmlDom: DOM tree as from ``impl.createDocument()``
367
           parentNode: Parent that the section should be appended to
368
        """
369
        if self.split is not None:
4✔
370
            sectionNode = addContainerNode(xmlDom, parentNode, "split")
4✔
371
            addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
4✔
372
            addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
4✔
373

374
    def _parseXmlData(self, xmlData):
4✔
375
        """
376
        Internal method to parse an XML string into the object.
377

378
        This method parses the XML document into a DOM tree (``xmlDom``) and then
379
        calls a static method to parse the split configuration section.
380

381
        Args:
382
           xmlData (String data): XML data to be parsed
383
        Raises:
384
           ValueError: If the XML cannot be successfully parsed
385
        """
386
        (xmlDom, parentNode) = createInputDom(xmlData)
4✔
387
        self._split = LocalConfig._parseSplit(parentNode)
4✔
388

389
    @staticmethod
4✔
390
    def _parseSplit(parent):
4✔
391
        """
392
        Parses an split configuration section.
393

394
        We read the following individual fields::
395

396
           sizeLimit      //cb_config/split/size_limit
397
           splitSize      //cb_config/split/split_size
398

399
        Args:
400
           parent: Parent node to search beneath
401

402
        Returns:
403
            ``EncryptConfig`` object or ``None`` if the section does not exist
404
        Raises:
405
           ValueError: If some filled-in value is invalid
406
        """
407
        split = None
4✔
408
        section = readFirstChild(parent, "split")
4✔
409
        if section is not None:
4✔
410
            split = SplitConfig()
4✔
411
            split.sizeLimit = readByteQuantity(section, "size_limit")
4✔
412
            split.splitSize = readByteQuantity(section, "split_size")
4✔
413
        return split
4✔
414

415

416
########################################################################
417
# Public functions
418
########################################################################
419

420
###########################
421
# executeAction() function
422
###########################
423

424

425
def executeAction(configPath, options, config):  # noqa: ARG001
4✔
426
    """
427
    Executes the split backup action.
428

429
    Args:
430
       configPath (String representing a path on disk): Path to configuration file on disk
431
       options (Options object): Program command-line options
432
       config (Config object): Program configuration
433
    Raises:
434
       ValueError: Under many generic error conditions
435
       IOError: If there are I/O problems reading or writing files
436
    """
437
    logger.debug("Executing split extended action.")
×
438
    if config.options is None or config.stage is None:
×
439
        raise ValueError("Cedar Backup configuration is not properly filled in.")
×
440
    local = LocalConfig(xmlPath=configPath)
×
441
    dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR)
×
442
    for dailyDir in dailyDirs:
×
443
        _splitDailyDir(
×
444
            dailyDir, local.split.sizeLimit, local.split.splitSize, config.options.backupUser, config.options.backupGroup
445
        )
446
        writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup)
×
447
    logger.info("Executed the split extended action successfully.")
×
448

449

450
##############################
451
# _splitDailyDir() function
452
##############################
453

454

455
def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
4✔
456
    """
457
    Splits large files in a daily staging directory.
458

459
    Files that match INDICATOR_PATTERNS (i.e. ``"cback.store"``,
460
    ``"cback.stage"``, etc.) are assumed to be indicator files and are ignored.
461
    All other files are split.
462

463
    Args:
464
       dailyDir: Daily directory to encrypt
465
       sizeLimit: Size limit, in bytes
466
       splitSize: Split size, in bytes
467
       backupUser: User that target files should be owned by
468
       backupGroup: Group that target files should be owned by
469

470
    Raises:
471
       ValueError: If the encrypt mode is not supported
472
       ValueError: If the daily staging directory does not exist
473
    """
474
    logger.debug("Begin splitting contents of [%s].", dailyDir)
×
475
    fileList = getBackupFiles(dailyDir)  # ignores indicator files
×
476
    for path in fileList:
×
477
        size = float(os.stat(path).st_size)
×
478
        if size > sizeLimit:
×
479
            _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
×
480
    logger.debug("Completed splitting contents of [%s].", dailyDir)
×
481

482

483
########################
484
# _splitFile() function
485
########################
486

487

488
def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
4✔
489
    """
490
    Splits the source file into chunks of the indicated size.
491

492
    The split files will be owned by the indicated backup user and group.  If
493
    ``removeSource`` is ``True``, then the source file will be removed after it is
494
    successfully split.
495

496
    Args:
497
       sourcePath: Absolute path of the source file to split
498
       splitSize: Encryption mode (only "gpg" is allowed)
499
       backupUser: User that target files should be owned by
500
       backupGroup: Group that target files should be owned by
501
       removeSource: Indicates whether to remove the source file
502

503
    Raises:
504
       IOError: If there is a problem accessing, splitting or removing the source file
505
    """
506
    cwd = os.getcwd()
×
507
    try:
×
508
        if not os.path.exists(sourcePath):
×
509
            raise ValueError("Source path [%s] does not exist." % sourcePath)
×
510
        dirname = os.path.dirname(sourcePath)
×
511
        filename = os.path.basename(sourcePath)
×
512
        prefix = "%s_" % filename
×
NEW
513
        bytes = int(splitSize.bytes)  # noqa: A001
×
514
        os.chdir(dirname)  # need to operate from directory that we want files written to
×
515
        command = resolveCommand(SPLIT_COMMAND)
×
516
        args = ["--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix]
×
517
        (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
×
518
        if result != 0:
×
519
            raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath))
×
520
        pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix)
×
521
        match = pattern.search(output[-1:][0])
×
522
        if match is None:
×
523
            raise IOError("Unable to parse output from split command.")
×
524
        value = int(match.group(3).strip())
×
525
        for index in range(0, value):
×
526
            path = "%s%05d" % (prefix, index)
×
527
            if not os.path.exists(path):
×
528
                raise IOError("After call to split, expected file [%s] does not exist." % path)
×
529
            changeOwnership(path, backupUser, backupGroup)
×
530
        if removeSource:
×
531
            if os.path.exists(sourcePath):
×
532
                try:
×
533
                    os.remove(sourcePath)
×
534
                    logger.debug("Completed removing old file [%s].", sourcePath)
×
535
                except:
×
536
                    raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath))
×
537
    finally:
538
        os.chdir(cwd)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc