• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / cg / 15848903735

24 Jun 2025 11:13AM UTC coverage: 85.73%. First build
15848903735

Pull #4227

github

web-flow
Merge ada4f0f02 into 77d984373
Pull Request #4227: DEV - Start pipelines

969 of 1136 new or added lines in 46 files covered. (85.3%)

27624 of 32222 relevant lines covered (85.73%)

0.86 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.71
/cg/models/compression_data.py
1
"""Class to hold file information about a compression entity"""
2

3
import logging
1✔
4
import os
1✔
5
from datetime import date, datetime
1✔
6
from pathlib import Path
1✔
7

8
from cg.apps.crunchy.files import check_if_update_spring, get_crunchy_metadata, get_file_updated_at
1✔
9
from cg.apps.crunchy.models import CrunchyMetadata
1✔
10
from cg.constants import FASTQ_FIRST_READ_SUFFIX, FASTQ_SECOND_READ_SUFFIX, FileExtensions
1✔
11
from cg.constants.compression import PENDING_PATH_SUFFIX
1✔
12

13
LOG = logging.getLogger(__name__)
1✔
14

15

16
class CompressionData:
1✔
17
    """Holds information about compression data"""
18

19
    def __init__(self, stub: Path = None):
1✔
20
        """Initialise a compression data object
21

22
        The stub is first part of the file name
23
        """
24
        self.stub = stub
1✔
25
        self.stub_string = str(self.stub)
1✔
26

27
    @property
1✔
28
    def pending_path(self) -> Path:
1✔
29
        """Return the path to a compression pending file"""
30
        return self.stub.with_suffix(PENDING_PATH_SUFFIX)
1✔
31

32
    @property
1✔
33
    def spring_path(self) -> Path:
1✔
34
        """Return the path to a SPRING file"""
35
        return self.stub.with_suffix(FileExtensions.SPRING)
1✔
36

37
    @property
1✔
38
    def encrypted_spring_path(self) -> Path:
1✔
39
        """Return the path to a SPRING file"""
40
        return self.stub.with_suffix(FileExtensions.SPRING).with_suffix(FileExtensions.GPG)
×
41

42
    @property
1✔
43
    def spring_metadata_path(self) -> Path:
1✔
44
        """Return the path to a SPRING metadata file"""
45
        return self.stub.with_suffix(".json")
1✔
46

47
    @property
1✔
48
    def analysis_dir(self) -> Path:
1✔
49
        """Return the path to folder where analysis is"""
50
        return self.stub.resolve().parent
1✔
51

52
    @property
1✔
53
    def fastq_first(self) -> Path:
1✔
54
        """Return the path to the first read in pair"""
55
        return Path(self.stub_string + FASTQ_FIRST_READ_SUFFIX)
1✔
56

57
    @property
1✔
58
    def fastq_second(self) -> Path:
1✔
59
        """Return the path to the second read in pair"""
60
        return Path(self.stub_string + FASTQ_SECOND_READ_SUFFIX)
1✔
61

62
    @property
1✔
63
    def run_name(self) -> str:
1✔
64
        """Return the name of the sequencing run identifier"""
65
        return self.stub.name
1✔
66

67
    def pair_exists(self) -> bool:
1✔
68
        """Check that both files in FASTQ pair exists"""
69
        LOG.info("Check if FASTQ pair exists")
1✔
70
        if not self.file_exists_and_is_accessible(self.fastq_first):
1✔
71
            return False
1✔
72
        return bool(self.file_exists_and_is_accessible(self.fastq_second))
1✔
73

74
    @staticmethod
1✔
75
    def is_absolute(file_path: Path) -> bool:
1✔
76
        """Check if file path can be resolved"""
77
        if not file_path.is_absolute():
1✔
78
            LOG.info("Could not resolve full path from HK to %s", file_path)
×
79
            return False
×
80
        return True
1✔
81

82
    @staticmethod
1✔
83
    def file_exists_and_is_accessible(file_path: Path) -> bool:
1✔
84
        """Check if file exists and is accesible"""
85
        try:
1✔
86
            if not file_path.exists():
1✔
87
                LOG.info("%s does not exist", file_path)
1✔
88
                return False
1✔
89
        except PermissionError:
×
90
            LOG.warning("Not permitted to access %s. Skipping", file_path)
×
91
            return False
×
92
        return True
1✔
93

94
    @staticmethod
1✔
95
    def is_symlink(file_path: Path) -> bool:
1✔
96
        """Check if file path is symbolik link"""
97
        LOG.info("Check if %s is a symlink", file_path)
×
98
        return os.path.islink(file_path)
×
99

100
    @staticmethod
1✔
101
    def get_change_date(file_path: Path) -> datetime:
1✔
102
        """Return the time when this file was changed"""
103
        changed_date = datetime.fromtimestamp(file_path.stat().st_mtime)
1✔
104
        LOG.info("File %s was changed %s", file_path, changed_date)
1✔
105
        return changed_date
1✔
106

107
    def spring_exists(self) -> bool:
1✔
108
        """Check if the SPRING file exists"""
109
        LOG.info("Check if SPRING archive file exists")
1✔
110
        return self.file_exists_and_is_accessible(self.spring_path)
1✔
111

112
    def metadata_exists(self) -> bool:
1✔
113
        """Check if the SPRING metadata file exists"""
114
        LOG.info("Check if SPRING metadata file exists")
1✔
115
        return self.file_exists_and_is_accessible(self.spring_metadata_path)
1✔
116

117
    def pending_exists(self) -> bool:
1✔
118
        """Check if the SPRING pending flag file exists"""
119
        LOG.info("Check if pending compression file exists")
1✔
120
        return self.file_exists_and_is_accessible(self.pending_path)
1✔
121

122
    @property
1✔
123
    def is_compression_pending(self) -> bool:
1✔
124
        """Check if compression/decompression has started but not finished."""
125
        if self.pending_exists():
1✔
126
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
127
            return True
1✔
128
        LOG.info("Compression/decompression is not running")
1✔
129
        return False
1✔
130

131
    @property
1✔
132
    def is_fastq_compression_possible(self) -> bool:
1✔
133
        """Check if FASTQ compression is possible.
134

135
        - Compression is running          -> Compression NOT possible
136
        - SPRING file exists on Hasta     -> Compression NOT possible
137
        - Data is external                -> Compression NOT possible
138
        - Not compressed and not running  -> Compression IS possible
139
        """
140
        if self.is_compression_pending:
1✔
141
            return False
1✔
142

143
        if self.spring_exists():
1✔
144
            LOG.debug("SPRING file found")
1✔
145
            return False
1✔
146

147
        if "external-data" in str(self.fastq_first):
1✔
148
            LOG.debug("File is external data and should not be compressed")
×
149
            return False
×
150

151
        LOG.debug("FASTQ compression is possible")
1✔
152

153
        return True
1✔
154

155
    @property
1✔
156
    def is_spring_decompression_possible(self) -> bool:
1✔
157
        """Check if SPRING decompression is possible.
158

159
        There are three possible answers to this question:
160

161
            - Compression/Decompression is running      -> Decompression is NOT possible
162
            - The FASTQ files are not compressed        -> Decompression is NOT possible
163
            - Compression has been performed            -> Decompression IS possible
164

165
        """
166
        if self.pending_exists():
1✔
167
            LOG.info(f"Compression/decompression is pending for {self.run_name}")
1✔
168
            return False
1✔
169

170
        if not self.spring_exists():
1✔
171
            LOG.info("No SPRING file found")
1✔
172
            return False
1✔
173

174
        if self.pair_exists():
1✔
175
            LOG.info("FASTQ files already exists")
1✔
176
            return False
1✔
177

178
        LOG.info("Decompression is possible")
1✔
179

180
        return True
1✔
181

182
    @property
1✔
183
    def is_fastq_compression_done(self) -> bool:
1✔
184
        """Check if FASTQ compression is finished.
185

186
        This is checked by controlling that the SPRING files that are produced after FASTQ
187
        compression exists.
188

189
        The following has to be fulfilled for FASTQ compression to be considered done:
190

191
            - A SPRING archive file exists
192
            - A SPRING archive metadata file exists
193
            - The SPRING archive has not been unpacked before FASTQ delta (21 days)
194

195
        Note:
196
        'updated_at' indicates at what date the SPRING archive was unarchived last.
197
        If the SPRING archive has never been unarchived 'updated_at' is None.
198

199
        """
200
        LOG.info("Check if FASTQ compression is finished")
1✔
201
        LOG.info(f"Check if SPRING file {self.spring_path} exists")
1✔
202
        if not self.spring_exists():
1✔
203
            LOG.info(
1✔
204
                f"No SPRING file for {self.run_name}",
205
            )
206
            return False
1✔
207
        LOG.info("SPRING file found")
1✔
208

209
        LOG.info(f"Check if SPRING metadata file {self.spring_metadata_path} exists")
1✔
210
        if not self.metadata_exists():
1✔
211
            LOG.info("No metadata file found")
1✔
212
            return False
1✔
213
        LOG.info("SPRING metadata file found")
1✔
214

215
        # We want this to raise exception if file is malformed
216
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(self.spring_metadata_path)
1✔
217

218
        # Check if the SPRING archive has been unarchived
219
        updated_at: date | None = get_file_updated_at(crunchy_metadata)
1✔
220
        if updated_at is None:
1✔
221
            LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
222
            return True
1✔
223

224
        LOG.info(f"Files where unpacked {updated_at}")
1✔
225

226
        if not check_if_update_spring(updated_at):
1✔
227
            return False
1✔
228

229
        LOG.info(f"FASTQ compression is done for {self.run_name}")
1✔
230

231
        return True
1✔
232

233
    @property
1✔
234
    def is_spring_decompression_done(self) -> bool:
1✔
235
        """Check if SPRING decompression if finished.
236

237
        This means that all three files specified in SPRING metadata should exist.
238
        That is
239

240
            - First read in FASTQ pair should exist
241
            - Second read in FASTQ pair should exist
242
            - SPRING archive file should still exist
243
        """
244

245
        spring_metadata_path: Path = self.spring_metadata_path
1✔
246
        LOG.info(f"Check if SPRING metadata file {spring_metadata_path} exists")
1✔
247

248
        if not self.metadata_exists():
1✔
249
            LOG.info("No SPRING metadata file found")
1✔
250
            return False
1✔
251

252
        # We want this to exit hard if the metadata is malformed
253
        crunchy_metadata: CrunchyMetadata = get_crunchy_metadata(spring_metadata_path)
1✔
254

255
        for file_info in crunchy_metadata.files:
1✔
256
            if not Path(file_info.path).exists():
1✔
257
                LOG.info(f"File {file_info.path} does not exist")
1✔
258
                return False
1✔
259
            if not file_info.updated:
1✔
260
                LOG.info("Files have not been unarchived")
1✔
261
                return False
1✔
262

263
        LOG.info(f"SPRING decompression is done for run {self.run_name}")
1✔
264
        return True
1✔
265

266
    def __str__(self):
1✔
267
        return f"CompressionData(run:{self.run_name})"
×
268

269
    def __repr__(self):
1✔
270
        return f"CompressionData(stub:{self.stub})"
×
271

272

273
class SampleCompressionData:
1✔
274
    """Object encapsulating a sample's compression status."""
275

276
    def __init__(self, sample_id: str, compression_objects: list[CompressionData]):
1✔
NEW
277
        self.sample_id = sample_id
×
NEW
278
        self.compression_objects = compression_objects
×
279

280
    def is_decompression_needed(self) -> bool:
1✔
281
        """Check if decompression is needed for the specified sample."""
NEW
282
        LOG.debug(f"Checking if decompression is needed for {self.sample_id}.")
×
NEW
283
        return any(
×
284
            not compression_object.is_compression_pending and not compression_object.pair_exists()
285
            for compression_object in self.compression_objects
286
        )
287

288
    def is_spring_decompression_running(self) -> bool:
1✔
289
        """Check if sample is being decompressed"""
NEW
290
        return any(
×
291
            compression_object.is_compression_pending
292
            for compression_object in self.compression_objects
293
        )
294

295
    def can_be_decompressed(self) -> bool:
1✔
296
        """Returns True if at least one Spring file can be decompressed, otherwise False"""
NEW
297
        return any(
×
298
            compression_object.is_spring_decompression_possible
299
            for compression_object in self.compression_objects
300
        )
301

302

303
class CaseCompressionData:
1✔
304
    """Object encapsulating a case's compression status."""
305

306
    def __init__(self, case_id: str, sample_compression_data: list[SampleCompressionData]):
1✔
NEW
307
        self.case_id = case_id
×
NEW
308
        self.sample_compression_data = sample_compression_data
×
309

310
    def is_spring_decompression_needed(self) -> bool:
1✔
311
        """Check if spring decompression needs to be started"""
NEW
312
        return any(
×
313
            sample_compression.is_decompression_needed()
314
            for sample_compression in self.sample_compression_data
315
        )
316

317
    def is_spring_decompression_running(self) -> bool:
1✔
318
        """Check if case is being decompressed"""
NEW
319
        return any(
×
320
            sample_compression.is_spring_decompression_running()
321
            for sample_compression in self.sample_compression_data
322
        )
323

324
    def can_at_least_one_sample_be_decompressed(self) -> bool:
1✔
325
        """Returns True if at least one sample can be decompressed, otherwise False"""
NEW
326
        return any(
×
327
            sample_compression.can_be_decompressed()
328
            for sample_compression in self.sample_compression_data
329
        )
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc