• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pronovic / cedar-backup3 / 17816721342

18 Sep 2025 03:05AM UTC coverage: 73.39% (+0.08%) from 73.311%
17816721342

Pull #56

github

web-flow
Merge 3dfb9e71d into 50daef4a0
Pull Request #56: Address all Ruff linter errors, either with code changes or exclusions

139 of 277 new or added lines in 29 files covered. (50.18%)

2 existing lines in 2 files now uncovered.

7907 of 10774 relevant lines covered (73.39%)

2.93 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.62
/src/CedarBackup3/extend/split.py
1
# vim: set ft=python ts=4 sw=4 expandtab:
2
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
3
#
4
#              C E D A R
5
#          S O L U T I O N S       "Software done right."
6
#           S O F T W A R E
7
#
8
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
9
#
10
# Copyright (c) 2007,2010,2013,2015 Kenneth J. Pronovici.
11
# All rights reserved.
12
#
13
# This program is free software; you can redistribute it and/or
14
# modify it under the terms of the GNU General Public License,
15
# Version 2, as published by the Free Software Foundation.
16
#
17
# This program is distributed in the hope that it will be useful,
18
# but WITHOUT ANY WARRANTY; without even the implied warranty of
19
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
20
#
21
# Copies of the GNU General Public License are available from
22
# the Free Software Foundation website, http://www.gnu.org/.
23
#
24
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
25
#
26
# Author   : Kenneth J. Pronovici <pronovic@ieee.org>
27
# Language : Python 3
28
# Project  : Official Cedar Backup Extensions
29
# Purpose  : Provides an extension to split up large files in staging directories.
30
#
31
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
32

33
########################################################################
34
# Module documentation
35
########################################################################
36

37
"""
38
Provides an extension to split up large files in staging directories.
39

40
When this extension is executed, it will look through the configured Cedar
41
Backup staging directory for files exceeding a specified size limit, and split
42
them down into smaller files using the 'split' utility.  Any directory which
43
has already been split (as indicated by the ``cback.split`` file) will be
44
ignored.
45

46
This extension requires a new configuration section <split> and is intended
47
to be run immediately after the standard stage action or immediately before the
48
standard store action.  Aside from its own configuration, it requires the
49
options and staging configuration sections in the standard Cedar Backup
50
configuration file.
51

52
:author: Kenneth J. Pronovici <pronovic@ieee.org>
53
"""
54

55
########################################################################
56
# Imported modules
57
########################################################################
58

59
import logging
4✔
60
import os
4✔
61
import re
4✔
62
from functools import total_ordering
4✔
63

64
from CedarBackup3.actions.util import findDailyDirs, getBackupFiles, writeIndicatorFile
4✔
65
from CedarBackup3.config import ByteQuantity, addByteQuantityNode, readByteQuantity
4✔
66
from CedarBackup3.util import changeOwnership, executeCommand, resolveCommand
4✔
67
from CedarBackup3.xmlutil import addContainerNode, createInputDom, readFirstChild
4✔
68

69
########################################################################
70
# Module-wide constants and variables
71
########################################################################
72

73
logger = logging.getLogger("CedarBackup3.log.extend.split")
4✔
74

75
SPLIT_COMMAND = ["split"]
4✔
76
SPLIT_INDICATOR = "cback.split"
4✔
77

78

79
########################################################################
80
# SplitConfig class definition
81
########################################################################
82

83

84
@total_ordering
4✔
85
class SplitConfig:
4✔
86
    """
87
    Class representing split configuration.
88

89
    Split configuration is used for splitting staging directories.
90

91
    The following restrictions exist on data in this class:
92

93
       - The size limit must be a ByteQuantity
94
       - The split size must be a ByteQuantity
95

96
    """
97

98
    def __init__(self, sizeLimit=None, splitSize=None):
4✔
99
        """
100
        Constructor for the ``SplitCOnfig`` class.
101

102
        Args:
103
           sizeLimit: Size limit of the files, in bytes
104
           splitSize: Size that files exceeding the limit will be split into, in bytes
105

106
        Raises:
107
           ValueError: If one of the values is invalid
108
        """
109
        self._sizeLimit = None
4✔
110
        self._splitSize = None
4✔
111
        self.sizeLimit = sizeLimit
4✔
112
        self.splitSize = splitSize
4✔
113

114
    def __repr__(self):
4✔
115
        """
116
        Official string representation for class instance.
117
        """
118
        return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
4✔
119

120
    def __str__(self):
4✔
121
        """
122
        Informal string representation for class instance.
123
        """
124
        return self.__repr__()
4✔
125

126
    def __eq__(self, other):
4✔
127
        """Equals operator, iplemented in terms of original Python 2 compare operator."""
128
        return self.__cmp__(other) == 0
4✔
129

130
    def __lt__(self, other):
4✔
131
        """Less-than operator, iplemented in terms of original Python 2 compare operator."""
132
        return self.__cmp__(other) < 0
4✔
133

134
    def __gt__(self, other):
4✔
135
        """Greater-than operator, iplemented in terms of original Python 2 compare operator."""
136
        return self.__cmp__(other) > 0
4✔
137

138
    def __cmp__(self, other):
4✔
139
        """
140
        Original Python 2 comparison operator.
141
        Lists within this class are "unordered" for equality comparisons.
142
        Args:
143
           other: Other object to compare to
144
        Returns:
145
            -1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
146
        """
147
        if other is None:
4✔
148
            return 1
4✔
149
        if self.sizeLimit != other.sizeLimit:
4✔
150
            if (self.sizeLimit or ByteQuantity()) < (other.sizeLimit or ByteQuantity()):
4✔
151
                return -1
4✔
152
            else:
153
                return 1
×
154
        if self.splitSize != other.splitSize:
4✔
155
            if (self.splitSize or ByteQuantity()) < (other.splitSize or ByteQuantity()):
4✔
156
                return -1
4✔
157
            else:
158
                return 1
×
159
        return 0
4✔
160

161
    def _setSizeLimit(self, value):
4✔
162
        """
163
        Property target used to set the size limit.
164
        If not ``None``, the value must be a ``ByteQuantity`` object.
165
        Raises:
166
           ValueError: If the value is not a ``ByteQuantity``
167
        """
168
        if value is None:
4✔
169
            self._sizeLimit = None
4✔
170
        else:
171
            if not isinstance(value, ByteQuantity):
4✔
172
                raise ValueError("Value must be a ``ByteQuantity`` object.")
4✔
173
            self._sizeLimit = value
4✔
174

175
    def _getSizeLimit(self):
4✔
176
        """
177
        Property target used to get the size limit.
178
        """
179
        return self._sizeLimit
4✔
180

181
    def _setSplitSize(self, value):
4✔
182
        """
183
        Property target used to set the split size.
184
        If not ``None``, the value must be a ``ByteQuantity`` object.
185
        Raises:
186
           ValueError: If the value is not a ``ByteQuantity``
187
        """
188
        if value is None:
4✔
189
            self._splitSize = None
4✔
190
        else:
191
            if not isinstance(value, ByteQuantity):
4✔
192
                raise ValueError("Value must be a ``ByteQuantity`` object.")
4✔
193
            self._splitSize = value
4✔
194

195
    def _getSplitSize(self):
4✔
196
        """
197
        Property target used to get the split size.
198
        """
199
        return self._splitSize
4✔
200

201
    sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity")
4✔
202
    splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
4✔
203

204

205
########################################################################
206
# LocalConfig class definition
207
########################################################################
208

209

210
@total_ordering
4✔
211
class LocalConfig:
4✔
212
    """
213
    Class representing this extension's configuration document.
214

215
    This is not a general-purpose configuration object like the main Cedar
216
    Backup configuration object.  Instead, it just knows how to parse and emit
217
    split-specific configuration values.  Third parties who need to read and
218
    write configuration related to this extension should access it through the
219
    constructor, ``validate`` and ``addConfig`` methods.
220

221
    *Note:* Lists within this class are "unordered" for equality comparisons.
222

223
    """
224

225
    def __init__(self, xmlData=None, xmlPath=None, validate=True):
4✔
226
        """
227
        Initializes a configuration object.
228

229
        If you initialize the object without passing either ``xmlData`` or
230
        ``xmlPath`` then configuration will be empty and will be invalid until it
231
        is filled in properly.
232

233
        No reference to the original XML data or original path is saved off by
234
        this class.  Once the data has been parsed (successfully or not) this
235
        original information is discarded.
236

237
        Unless the ``validate`` argument is ``False``, the :any:`LocalConfig.validate`
238
        method will be called (with its default arguments) against configuration
239
        after successfully parsing any passed-in XML.  Keep in mind that even if
240
        ``validate`` is ``False``, it might not be possible to parse the passed-in
241
        XML document if lower-level validations fail.
242

243
        *Note:* It is strongly suggested that the ``validate`` option always be set
244
        to ``True`` (the default) unless there is a specific need to read in
245
        invalid configuration from disk.
246

247
        Args:
248
           xmlData (String data): XML data representing configuration
249
           xmlPath (Absolute path to a file on disk): Path to an XML file on disk
250
           validate (Boolean true/false): Validate the document after parsing it
251
        Raises:
252
           ValueError: If both ``xmlData`` and ``xmlPath`` are passed-in
253
           ValueError: If the XML data in ``xmlData`` or ``xmlPath`` cannot be parsed
254
           ValueError: If the parsed configuration document is not valid
255
        """
256
        self._split = None
4✔
257
        self.split = None
4✔
258
        if xmlData is not None and xmlPath is not None:
4✔
259
            raise ValueError("Use either xmlData or xmlPath, but not both.")
4✔
260
        if xmlData is not None:
4✔
261
            self._parseXmlData(xmlData)
4✔
262
            if validate:
4✔
263
                self.validate()
4✔
264
        elif xmlPath is not None:
4✔
265
            with open(xmlPath) as f:
4✔
266
                xmlData = f.read()
4✔
267
            self._parseXmlData(xmlData)
4✔
268
            if validate:
4✔
269
                self.validate()
4✔
270

271
    def __repr__(self):
4✔
272
        """
273
        Official string representation for class instance.
274
        """
275
        return "LocalConfig(%s)" % (self.split)
4✔
276

277
    def __str__(self):
4✔
278
        """
279
        Informal string representation for class instance.
280
        """
281
        return self.__repr__()
4✔
282

283
    def __eq__(self, other):
4✔
284
        """Equals operator, iplemented in terms of original Python 2 compare operator."""
285
        return self.__cmp__(other) == 0
4✔
286

287
    def __lt__(self, other):
4✔
288
        """Less-than operator, iplemented in terms of original Python 2 compare operator."""
289
        return self.__cmp__(other) < 0
4✔
290

291
    def __gt__(self, other):
4✔
292
        """Greater-than operator, iplemented in terms of original Python 2 compare operator."""
293
        return self.__cmp__(other) > 0
4✔
294

295
    def __cmp__(self, other):
4✔
296
        """
297
        Original Python 2 comparison operator.
298
        Lists within this class are "unordered" for equality comparisons.
299
        Args:
300
           other: Other object to compare to
301
        Returns:
302
            -1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
303
        """
304
        if other is None:
4✔
305
            return 1
×
306
        if self.split != other.split:
4✔
307
            if self.split < other.split:
4✔
308
                return -1
4✔
309
            else:
310
                return 1
×
311
        return 0
4✔
312

313
    def _setSplit(self, value):
4✔
314
        """
315
        Property target used to set the split configuration value.
316
        If not ``None``, the value must be a ``SplitConfig`` object.
317
        Raises:
318
           ValueError: If the value is not a ``SplitConfig``
319
        """
320
        if value is None:
4✔
321
            self._split = None
4✔
322
        else:
323
            if not isinstance(value, SplitConfig):
4✔
324
                raise ValueError("Value must be a ``SplitConfig`` object.")
4✔
325
            self._split = value
4✔
326

327
    def _getSplit(self):
4✔
328
        """
329
        Property target used to get the split configuration value.
330
        """
331
        return self._split
4✔
332

333
    split = property(_getSplit, _setSplit, None, "Split configuration in terms of a ``SplitConfig`` object.")
4✔
334

335
    def validate(self):
4✔
336
        """
337
        Validates configuration represented by the object.
338

339
        Split configuration must be filled in.  Within that, both the size limit
340
        and split size must be filled in.
341

342
        Raises:
343
           ValueError: If one of the validations fails
344
        """
345
        if self.split is None:
4✔
346
            raise ValueError("Split section is required.")
4✔
347
        if self.split.sizeLimit is None:
4✔
348
            raise ValueError("Size limit must be set.")
4✔
349
        if self.split.splitSize is None:
4✔
350
            raise ValueError("Split size must be set.")
4✔
351

352
    def addConfig(self, xmlDom, parentNode):
4✔
353
        """
354
        Adds a <split> configuration section as the next child of a parent.
355

356
        Third parties should use this function to write configuration related to
357
        this extension.
358

359
        We add the following fields to the document::
360

361
           sizeLimit      //cb_config/split/size_limit
362
           splitSize      //cb_config/split/split_size
363

364
        Args:
365
           xmlDom: DOM tree as from ``impl.createDocument()``
366
           parentNode: Parent that the section should be appended to
367
        """
368
        if self.split is not None:
4✔
369
            sectionNode = addContainerNode(xmlDom, parentNode, "split")
4✔
370
            addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit)
4✔
371
            addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
4✔
372

373
    def _parseXmlData(self, xmlData):
4✔
374
        """
375
        Internal method to parse an XML string into the object.
376

377
        This method parses the XML document into a DOM tree (``xmlDom``) and then
378
        calls a static method to parse the split configuration section.
379

380
        Args:
381
           xmlData (String data): XML data to be parsed
382
        Raises:
383
           ValueError: If the XML cannot be successfully parsed
384
        """
385
        (_, parentNode) = createInputDom(xmlData)
4✔
386
        self._split = LocalConfig._parseSplit(parentNode)
4✔
387

388
    @staticmethod
4✔
389
    def _parseSplit(parent):
4✔
390
        """
391
        Parses an split configuration section.
392

393
        We read the following individual fields::
394

395
           sizeLimit      //cb_config/split/size_limit
396
           splitSize      //cb_config/split/split_size
397

398
        Args:
399
           parent: Parent node to search beneath
400

401
        Returns:
402
            ``EncryptConfig`` object or ``None`` if the section does not exist
403
        Raises:
404
           ValueError: If some filled-in value is invalid
405
        """
406
        split = None
4✔
407
        section = readFirstChild(parent, "split")
4✔
408
        if section is not None:
4✔
409
            split = SplitConfig()
4✔
410
            split.sizeLimit = readByteQuantity(section, "size_limit")
4✔
411
            split.splitSize = readByteQuantity(section, "split_size")
4✔
412
        return split
4✔
413

414

415
########################################################################
416
# Public functions
417
########################################################################
418

419
###########################
420
# executeAction() function
421
###########################
422

423

424
def executeAction(configPath, options, config):  # noqa: ARG001
4✔
425
    """
426
    Executes the split backup action.
427

428
    Args:
429
       configPath (String representing a path on disk): Path to configuration file on disk
430
       options (Options object): Program command-line options
431
       config (Config object): Program configuration
432
    Raises:
433
       ValueError: Under many generic error conditions
434
       IOError: If there are I/O problems reading or writing files
435
    """
436
    logger.debug("Executing split extended action.")
×
437
    if config.options is None or config.stage is None:
×
438
        raise ValueError("Cedar Backup configuration is not properly filled in.")
×
439
    local = LocalConfig(xmlPath=configPath)
×
440
    dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR)
×
441
    for dailyDir in dailyDirs:
×
442
        _splitDailyDir(
×
443
            dailyDir, local.split.sizeLimit, local.split.splitSize, config.options.backupUser, config.options.backupGroup
444
        )
445
        writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup)
×
446
    logger.info("Executed the split extended action successfully.")
×
447

448

449
##############################
450
# _splitDailyDir() function
451
##############################
452

453

454
def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
4✔
455
    """
456
    Splits large files in a daily staging directory.
457

458
    Files that match INDICATOR_PATTERNS (i.e. ``"cback.store"``,
459
    ``"cback.stage"``, etc.) are assumed to be indicator files and are ignored.
460
    All other files are split.
461

462
    Args:
463
       dailyDir: Daily directory to encrypt
464
       sizeLimit: Size limit, in bytes
465
       splitSize: Split size, in bytes
466
       backupUser: User that target files should be owned by
467
       backupGroup: Group that target files should be owned by
468

469
    Raises:
470
       ValueError: If the encrypt mode is not supported
471
       ValueError: If the daily staging directory does not exist
472
    """
473
    logger.debug("Begin splitting contents of [%s].", dailyDir)
×
474
    fileList = getBackupFiles(dailyDir)  # ignores indicator files
×
475
    for path in fileList:
×
476
        size = float(os.stat(path).st_size)
×
477
        if size > sizeLimit:
×
478
            _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True)
×
479
    logger.debug("Completed splitting contents of [%s].", dailyDir)
×
480

481

482
########################
483
# _splitFile() function
484
########################
485

486

487
def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
4✔
488
    """
489
    Splits the source file into chunks of the indicated size.
490

491
    The split files will be owned by the indicated backup user and group.  If
492
    ``removeSource`` is ``True``, then the source file will be removed after it is
493
    successfully split.
494

495
    Args:
496
       sourcePath: Absolute path of the source file to split
497
       splitSize: Encryption mode (only "gpg" is allowed)
498
       backupUser: User that target files should be owned by
499
       backupGroup: Group that target files should be owned by
500
       removeSource: Indicates whether to remove the source file
501

502
    Raises:
503
       IOError: If there is a problem accessing, splitting or removing the source file
504
    """
505
    cwd = os.getcwd()
×
506
    try:
×
507
        if not os.path.exists(sourcePath):
×
508
            raise ValueError("Source path [%s] does not exist." % sourcePath)
×
509
        dirname = os.path.dirname(sourcePath)
×
510
        filename = os.path.basename(sourcePath)
×
511
        prefix = "%s_" % filename
×
NEW
512
        bytes = int(splitSize.bytes)  # noqa: A001
×
513
        os.chdir(dirname)  # need to operate from directory that we want files written to
×
514
        command = resolveCommand(SPLIT_COMMAND)
×
515
        args = ["--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix]
×
516
        (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False)
×
517
        if result != 0:
×
NEW
518
            raise OSError("Error [%d] calling split for [%s]." % (result, sourcePath))
×
519
        pattern = re.compile(r"(creating file [`'])(%s)(.*)(')" % prefix)
×
520
        match = pattern.search(output[-1:][0])
×
521
        if match is None:
×
NEW
522
            raise OSError("Unable to parse output from split command.")
×
523
        value = int(match.group(3).strip())
×
NEW
524
        for index in range(value):
×
525
            path = "%s%05d" % (prefix, index)
×
526
            if not os.path.exists(path):
×
NEW
527
                raise OSError("After call to split, expected file [%s] does not exist." % path)
×
528
            changeOwnership(path, backupUser, backupGroup)
×
529
        if removeSource:
×
530
            if os.path.exists(sourcePath):
×
531
                try:
×
532
                    os.remove(sourcePath)
×
533
                    logger.debug("Completed removing old file [%s].", sourcePath)
×
NEW
534
                except Exception:
×
NEW
535
                    raise OSError("Failed to remove file [%s] after splitting it." % (sourcePath))
×
536
    finally:
537
        os.chdir(cwd)
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc